From 4f05267418d024fc2bd544b9701478df4491eba5 Mon Sep 17 00:00:00 2001 From: moisesh Date: Wed, 15 Apr 2020 23:36:20 -0700 Subject: [PATCH 1/6] Add LeakyReLU:Gelu (fwd and bwd) to fused ops --- src/executor/pointwise_fusion_pass.cc | 14 +++++++++++ src/operator/fusion/fused_op-inl.h | 22 +++++++++++++++++ src/operator/fusion/fused_op.cu | 34 +++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/src/executor/pointwise_fusion_pass.cc b/src/executor/pointwise_fusion_pass.cc index 5db9706b4f99..43b513d51156 100644 --- a/src/executor/pointwise_fusion_pass.cc +++ b/src/executor/pointwise_fusion_pass.cc @@ -71,6 +71,20 @@ namespace { op_name) != variable_io_ops.end()) return true; + if (op_name == "LeakyReLU"){ + std::string act_type = n->attrs.dict.at("act_type"); + if (LeakyReLU_ops.count(act_type)) + return true; + else + return false; + } + if (op_name == "_backward_LeakyReLU"){ + std::string act_type = n->attrs.dict.at("act_type"); + if (LeakyReLU_bwd_ops.count(act_type)) + return true; + else + return false; + } return false; } diff --git a/src/operator/fusion/fused_op-inl.h b/src/operator/fusion/fused_op-inl.h index e45569f9dbf6..d0e5e7d9b36f 100644 --- a/src/operator/fusion/fused_op-inl.h +++ b/src/operator/fusion/fused_op-inl.h @@ -224,6 +224,15 @@ const std::map>> ops_desc = { {"(% * % / op::hypot(%, %))", "_0", "_2", "_1", "_2"}}} }; +// LeakyReLU ops: based on "act_type" attribute +const std::map>> LeakyReLU_ops = { + {"gelu" , {{"op::gelu(%)", "_0"}}}, +}; +const std::map>> LeakyReLU_bwd_ops = { + {"gelu" , {{"op::backward_gelu(%, %)", "_1", "_0"}}}, +}; + + const std::map slice_ops = { {"slice_axis" , ""}, {"slice" , ""}, @@ -543,6 +552,13 @@ __device__ inline DType relu(const DType val) { return val > 0 ? val : 0; } +__constant__ const float SQRT_2 = 1.4142135623730950488016887242096; +template +__device__ inline DType gelu(const DType val) { + return DType(0.5f * static_cast(val) * + (1.0f + erf(static_cast(val) / SQRT_2))); +} + template __device__ inline DType sigmoid(const DType val) { return 1.f/(1 + expf(-val)); @@ -987,6 +1003,12 @@ __device__ inline DTypeGrad backward_smooth_l1(const DType val, const DType2 sca } } +template +__device__ inline DTypeGrad backward_gelu(const DType val, const DTypeGrad grad) { + return grad * DType(0.5f * (1.0f + erf(static_cast(val) / SQRT_2) + + static_cast(val) * backward_erf(static_cast(val) / SQRT_2, 1.0f) / SQRT_2)); +} + } // namespace op )code"; diff --git a/src/operator/fusion/fused_op.cu b/src/operator/fusion/fused_op.cu index 00887240aa56..ec930916027b 100644 --- a/src/operator/fusion/fused_op.cu +++ b/src/operator/fusion/fused_op.cu @@ -460,6 +460,40 @@ std::string FusedOp::GenerateCode(const std::vector &req, continue; } + // LeakyReLU, look for act_type + if (op_name == "LeakyReLU") { + std::string act_type = node.source->attrs.dict.at("act_type"); + const std::vector>& op_descs = fusion::LeakyReLU_ops.at(act_type); + if (fusion::LeakyReLU_ops.find(act_type) != fusion::LeakyReLU_ops.end()) { + CHECK_EQ(outputs[i], op_descs.size()); + size_t count = 0; + for (const auto& op_desc : op_descs) { + var_name = "temp" + std::to_string(temp_name_counter++); + const std::string& fmt = ParseOpDescription(op_desc, variables, node); + code += "const auto " + var_name + " = " + fmt + ";\n"; + variables[{i, count}] = var_name; + ++count; + } + continue; + } + } + if (op_name == "_backward_LeakyReLU") { + std::string act_type = node.source->attrs.dict.at("act_type"); + const std::vector>& op_descs = fusion::LeakyReLU_bwd_ops.at(act_type); + if (fusion::LeakyReLU_ops.find(act_type) != fusion::LeakyReLU_bwd_ops.end()) { + CHECK_EQ(outputs[i], op_descs.size()); + size_t count = 0; + for (const auto& op_desc : op_descs) { + var_name = "temp" + std::to_string(temp_name_counter++); + const std::string& fmt = ParseOpDescription(op_desc, variables, node); + code += "const auto " + var_name + " = " + fmt + ";\n"; + variables[{i, count}] = var_name; + ++count; + } + continue; + } + } + LOG(FATAL) << "Unrecognized op " + op_name; } } else { From 3df43807e83dc6496e6b02b008052776790bbf91 Mon Sep 17 00:00:00 2001 From: moisesh Date: Wed, 15 Apr 2020 23:55:35 -0700 Subject: [PATCH 2/6] Add test LeakyReLU:gelu --- tests/python/gpu/test_fusion.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/python/gpu/test_fusion.py b/tests/python/gpu/test_fusion.py index 8e0f063b73ba..a6be6c7d6629 100644 --- a/tests/python/gpu/test_fusion.py +++ b/tests/python/gpu/test_fusion.py @@ -230,11 +230,24 @@ def check_other_ops(): arr2 = mx.random.uniform(shape=(2,2,2,3)) check_fused_symbol(mx.sym.broadcast_like(a, b, lhs_axes=[0], rhs_axes=[0]), a=arr1, b=arr2) +def check_leakyrelu_ops(): + a = mx.sym.Variable('a') + b = mx.sym.Variable('b') + shape = rand_shape_2d() + arr1 = mx.random.uniform(shape=shape) + arr2 = mx.random.uniform(shape=shape) + + # Testing gelu + print("Checking fusion of LeakyReLU:gelu") + check_fused_symbol(mx.sym.LeakyReLU(a+b, act_type='gelu'), a=arr1, b=arr2) + + @with_seed() def test_fusion(): check_unary_ops() check_binary_ops() check_other_ops() + check_leakyrelu_ops() @with_seed() def test_fusion_compiler_cache(): From 684eb1583f21d9f1fed5500a017ddef79e8018a1 Mon Sep 17 00:00:00 2001 From: moisesh Date: Thu, 16 Apr 2020 00:20:38 -0700 Subject: [PATCH 3/6] cpplint --- src/executor/pointwise_fusion_pass.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/executor/pointwise_fusion_pass.cc b/src/executor/pointwise_fusion_pass.cc index 43b513d51156..3203f67e8b68 100644 --- a/src/executor/pointwise_fusion_pass.cc +++ b/src/executor/pointwise_fusion_pass.cc @@ -71,14 +71,14 @@ namespace { op_name) != variable_io_ops.end()) return true; - if (op_name == "LeakyReLU"){ + if (op_name == "LeakyReLU") { std::string act_type = n->attrs.dict.at("act_type"); if (LeakyReLU_ops.count(act_type)) return true; else return false; } - if (op_name == "_backward_LeakyReLU"){ + if (op_name == "_backward_LeakyReLU") { std::string act_type = n->attrs.dict.at("act_type"); if (LeakyReLU_bwd_ops.count(act_type)) return true; From b8a5d346a9272215d8ebcb802a14801ec10a2931 Mon Sep 17 00:00:00 2001 From: moisesh Date: Thu, 16 Apr 2020 10:03:25 -0700 Subject: [PATCH 4/6] fix lint --- src/operator/fusion/fused_op.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/operator/fusion/fused_op.cu b/src/operator/fusion/fused_op.cu index ec930916027b..3d7caab2fb31 100644 --- a/src/operator/fusion/fused_op.cu +++ b/src/operator/fusion/fused_op.cu @@ -463,7 +463,8 @@ std::string FusedOp::GenerateCode(const std::vector &req, // LeakyReLU, look for act_type if (op_name == "LeakyReLU") { std::string act_type = node.source->attrs.dict.at("act_type"); - const std::vector>& op_descs = fusion::LeakyReLU_ops.at(act_type); + const std::vector>& op_descs = + fusion::LeakyReLU_ops.at(act_type); if (fusion::LeakyReLU_ops.find(act_type) != fusion::LeakyReLU_ops.end()) { CHECK_EQ(outputs[i], op_descs.size()); size_t count = 0; @@ -479,7 +480,8 @@ std::string FusedOp::GenerateCode(const std::vector &req, } if (op_name == "_backward_LeakyReLU") { std::string act_type = node.source->attrs.dict.at("act_type"); - const std::vector>& op_descs = fusion::LeakyReLU_bwd_ops.at(act_type); + const std::vector>& op_descs = + fusion::LeakyReLU_bwd_ops.at(act_type); if (fusion::LeakyReLU_ops.find(act_type) != fusion::LeakyReLU_bwd_ops.end()) { CHECK_EQ(outputs[i], op_descs.size()); size_t count = 0; From 27e75c3d9e91494bf3d3d10cec5c592b9916dd7f Mon Sep 17 00:00:00 2001 From: moisesh Date: Thu, 16 Apr 2020 10:56:27 -0700 Subject: [PATCH 5/6] fix bug SQRT_2 using constant memory --- src/operator/fusion/fused_op-inl.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/operator/fusion/fused_op-inl.h b/src/operator/fusion/fused_op-inl.h index d0e5e7d9b36f..2152d161d6f4 100644 --- a/src/operator/fusion/fused_op-inl.h +++ b/src/operator/fusion/fused_op-inl.h @@ -232,7 +232,6 @@ const std::map>> LeakyReLU_bwd {"gelu" , {{"op::backward_gelu(%, %)", "_1", "_0"}}}, }; - const std::map slice_ops = { {"slice_axis" , ""}, {"slice" , ""}, @@ -552,7 +551,7 @@ __device__ inline DType relu(const DType val) { return val > 0 ? val : 0; } -__constant__ const float SQRT_2 = 1.4142135623730950488016887242096; +const float SQRT_2 = 1.4142135623730950488016887242096; template __device__ inline DType gelu(const DType val) { return DType(0.5f * static_cast(val) * From 70d8cda7b5ecc1145a988ffa5b3a0fbe47bbbfb1 Mon Sep 17 00:00:00 2001 From: moisesh Date: Thu, 16 Apr 2020 11:01:48 -0700 Subject: [PATCH 6/6] add comments --- src/operator/fusion/fused_op-inl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/operator/fusion/fused_op-inl.h b/src/operator/fusion/fused_op-inl.h index 2152d161d6f4..0b10f821d8e1 100644 --- a/src/operator/fusion/fused_op-inl.h +++ b/src/operator/fusion/fused_op-inl.h @@ -552,6 +552,7 @@ __device__ inline DType relu(const DType val) { } const float SQRT_2 = 1.4142135623730950488016887242096; +// compatible with mshadow_op.h version template __device__ inline DType gelu(const DType val) { return DType(0.5f * static_cast(val) * @@ -1002,6 +1003,7 @@ __device__ inline DTypeGrad backward_smooth_l1(const DType val, const DType2 sca } } +// compatible with mshadow_op.h version template __device__ inline DTypeGrad backward_gelu(const DType val, const DTypeGrad grad) { return grad * DType(0.5f * (1.0f + erf(static_cast(val) / SQRT_2) +