Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Support boolean elemwise/broadcast binary add, multiply and true_divide #16728

Merged
merged 3 commits into from
Nov 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions src/operator/mshadow_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,18 @@ using std::is_integral;
} \
}

#define MXNET_BINARY_MATH_OP_NC_WITH_BOOL(name, expr) \
struct name : public mxnet_op::tunable { \
template<typename DType, \
typename std::enable_if<!std::is_same<DType, bool>::value, int>::type = 0> \
MSHADOW_XINLINE static DType Map(DType a, DType b) { \
return (expr); \
} \
MSHADOW_XINLINE static bool Map(bool a, bool b) { \
return (expr); \
} \
}

#define MXNET_BINARY_LOGIC_OP_NC(name, expr) \
struct name : public mxnet_op::tunable { \
template<typename DType> \
Expand Down Expand Up @@ -192,8 +204,6 @@ MXNET_BINARY_MATH_OP_NC(left, a);

MXNET_BINARY_MATH_OP_NC(right, b);

MXNET_BINARY_MATH_OP_NC(mul, a * b);

#ifndef _WIN32
struct mixed_plus {
template<typename DType,
Expand Down Expand Up @@ -288,11 +298,13 @@ struct mixed_mul {
};
#endif

MXNET_BINARY_MATH_OP_NC(div, a / b);
MXNET_BINARY_MATH_OP_NC_WITH_BOOL(mul, a * b);

MXNET_BINARY_MATH_OP_NC_WITH_BOOL(div, a / b);

MXNET_BINARY_MATH_OP_NC(plus, a + b);
MXNET_BINARY_MATH_OP_NC_WITH_BOOL(plus, a + b);

MXNET_BINARY_MATH_OP_NC(minus, a - b);
MXNET_BINARY_MATH_OP_NC_WITH_BOOL(minus, a - b);

MXNET_UNARY_MATH_OP(negation, -a);

Expand Down
29 changes: 13 additions & 16 deletions src/operator/numpy/np_elemwise_broadcast_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs,
return true;
}

#ifdef _WIN32
#ifndef _WIN32
#define MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(name) \
NNVM_REGISTER_OP(name) \
.set_num_inputs(2) \
Expand All @@ -85,10 +85,6 @@ bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs,
[](const NodeAttrs& attrs){ \
return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}}; \
}) \
.set_attr<FResourceRequest>("FResourceRequest", \
[](const NodeAttrs& attrs) { \
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; \
}) \
.add_argument("lhs", "NDArray-or-Symbol", "First input to the function") \
.add_argument("rhs", "NDArray-or-Symbol", "Second input to the function")
#else
Expand All @@ -106,6 +102,10 @@ bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs,
[](const NodeAttrs& attrs){ \
return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}}; \
}) \
.set_attr<FResourceRequest>("FResourceRequest", \
[](const NodeAttrs& attrs) { \
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; \
}) \
.add_argument("lhs", "NDArray-or-Symbol", "First input to the function") \
.add_argument("rhs", "NDArray-or-Symbol", "Second input to the function")
#endif
Expand All @@ -114,41 +114,38 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_add)
#ifndef _WIN32
.set_attr<FCompute>(
"FCompute<cpu>",
MixedBinaryBroadcastCompute<cpu, op::mshadow_op::plus, op::mshadow_op::mixed_plus,
op::mshadow_op::mixed_plus>)
NumpyBinaryBroadcastComputeWithBool<cpu, op::mshadow_op::plus, op::mshadow_op::mixed_plus,
op::mshadow_op::mixed_plus>)
#else
.set_attr<FCompute>(
"FCompute<cpu>",
MixedBinaryBroadcastCompute<cpu, op::mshadow_op::plus, op::mshadow_op::plus,
op::mshadow_op::plus>)
NumpyBinaryBroadcastComputeWithBool<cpu, op::mshadow_op::plus>)
#endif
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_broadcast_add"});

MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_subtract)
#ifndef _WIN32
.set_attr<FCompute>(
"FCompute<cpu>",
MixedBinaryBroadcastCompute<cpu, op::mshadow_op::minus, op::mshadow_op::mixed_minus,
NumpyBinaryBroadcastCompute<cpu, op::mshadow_op::minus, op::mshadow_op::mixed_minus,
op::mshadow_op::mixed_rminus>)
#else
.set_attr<FCompute>(
"FCompute<cpu>",
MixedBinaryBroadcastCompute<cpu, op::mshadow_op::minus, op::mshadow_op::minus,
op::mshadow_op::minus>)
NumpyBinaryBroadcastCompute<cpu, op::mshadow_op::minus>)
#endif
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_broadcast_sub"});

MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply)
#ifndef _WIN32
.set_attr<FCompute>(
"FCompute<cpu>",
MixedBinaryBroadcastCompute<cpu, op::mshadow_op::mul, op::mshadow_op::mixed_mul,
op::mshadow_op::mixed_mul>)
NumpyBinaryBroadcastComputeWithBool<cpu, op::mshadow_op::mul, op::mshadow_op::mixed_mul,
op::mshadow_op::mixed_mul>)
#else
.set_attr<FCompute>(
"FCompute<cpu>",
MixedBinaryBroadcastCompute<cpu, op::mshadow_op::mul, op::mshadow_op::mul,
op::mshadow_op::mul>)
NumpyBinaryBroadcastComputeWithBool<cpu, op::mshadow_op::mul>)
#endif
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_broadcast_mul"});

Expand Down
19 changes: 8 additions & 11 deletions src/operator/numpy/np_elemwise_broadcast_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,39 +32,36 @@ NNVM_REGISTER_OP(_npi_add)
#ifndef _WIN32
.set_attr<FCompute>(
"FCompute<gpu>",
MixedBinaryBroadcastCompute<gpu, op::mshadow_op::plus, op::mshadow_op::mixed_plus,
op::mshadow_op::mixed_plus>);
NumpyBinaryBroadcastComputeWithBool<gpu, op::mshadow_op::plus, op::mshadow_op::mixed_plus,
op::mshadow_op::mixed_plus>);
#else
.set_attr<FCompute>(
"FCompute<gpu>",
MixedBinaryBroadcastCompute<gpu, op::mshadow_op::plus, op::mshadow_op::plus,
op::mshadow_op::plus>);
NumpyBinaryBroadcastComputeWithBool<gpu, op::mshadow_op::plus>);
#endif

NNVM_REGISTER_OP(_npi_subtract)
#ifndef _WIN32
.set_attr<FCompute>(
"FCompute<gpu>",
MixedBinaryBroadcastCompute<gpu, op::mshadow_op::minus, op::mshadow_op::mixed_minus,
NumpyBinaryBroadcastCompute<gpu, op::mshadow_op::minus, op::mshadow_op::mixed_minus,
op::mshadow_op::mixed_rminus>);
#else
.set_attr<FCompute>(
"FCompute<gpu>",
MixedBinaryBroadcastCompute<gpu, op::mshadow_op::minus, op::mshadow_op::minus,
op::mshadow_op::minus>);
NumpyBinaryBroadcastCompute<gpu, op::mshadow_op::minus>);
#endif

NNVM_REGISTER_OP(_npi_multiply)
#ifndef _WIN32
.set_attr<FCompute>(
"FCompute<gpu>",
MixedBinaryBroadcastCompute<gpu, op::mshadow_op::mul, op::mshadow_op::mixed_mul,
op::mshadow_op::mixed_mul>);
NumpyBinaryBroadcastComputeWithBool<gpu, op::mshadow_op::mul, op::mshadow_op::mixed_mul,
op::mshadow_op::mixed_mul>);
#else
.set_attr<FCompute>(
"FCompute<gpu>",
MixedBinaryBroadcastCompute<gpu, op::mshadow_op::mul, op::mshadow_op::mul,
op::mshadow_op::mul>);
NumpyBinaryBroadcastComputeWithBool<gpu, op::mshadow_op::mul>);
#endif

NNVM_REGISTER_OP(_npi_mod)
Expand Down
81 changes: 72 additions & 9 deletions src/operator/numpy/np_elemwise_broadcast_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
namespace mxnet {
namespace op {

inline void PrintErrorMessage(const std::string& name, const int dtype1, const int dtype2) {
LOG(FATAL) << "Operator " << name << " does not support combination of "
inline void PrintErrorMessage(const std::string& op_name, const int dtype1, const int dtype2) {
LOG(FATAL) << "Operator " << op_name << " does not support combination of "
<< common::dtype_string(dtype1) << " with " << common::dtype_string(dtype2)
<< " yet...";
}
Expand Down Expand Up @@ -218,7 +218,11 @@ void MixedAllRealBinaryBroadcastCompute(const std::string& op_name,
}
#endif

#ifndef _WIN32
template<typename xpu, typename OP, typename LOP, typename ROP>
#else
template<typename xpu, typename OP>
#endif
void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
Expand All @@ -233,13 +237,6 @@ void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
const TBlob& rhs = inputs[1];
const TBlob& out = outputs[0];

if ((out.shape_.Size() == 0U) || (req[0] == kNullOp)) return;

if (lhs.type_flag_ == rhs.type_flag_) {
BinaryBroadcastCompute<xpu, OP>(attrs, ctx, inputs, req, outputs);
return;
}

#ifndef _WIN32
mxnet::TShape new_lshape, new_rshape, new_oshape;
int ndim = BinaryBroadcastShapeCompact(lhs.shape_, rhs.shape_, out.shape_,
Expand Down Expand Up @@ -317,6 +314,72 @@ void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
#endif
}

#ifndef _WIN32
template<typename xpu, typename OP, typename LOP, typename ROP>
#else
template<typename xpu, typename OP>
#endif
void NumpyBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
using namespace mshadow;
using namespace mxnet_op;
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 1U);

const TBlob& lhs = inputs[0];
const TBlob& rhs = inputs[1];
const TBlob& out = outputs[0];

if ((out.shape_.Size() == 0U) || (req[0] == kNullOp)) return;

if (lhs.type_flag_ == rhs.type_flag_) {
BinaryBroadcastCompute<xpu, OP>(attrs, ctx, inputs, req, outputs);
return;
}

#ifndef _WIN32
MixedBinaryBroadcastCompute<xpu, OP, LOP, ROP>(attrs, ctx, inputs, req, outputs);
#else
MixedBinaryBroadcastCompute<xpu, OP>(attrs, ctx, inputs, req, outputs);
#endif
}

#ifndef _WIN32
template<typename xpu, typename OP, typename LOP, typename ROP>
#else
template<typename xpu, typename OP>
#endif
void NumpyBinaryBroadcastComputeWithBool(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
using namespace mshadow;
using namespace mxnet_op;
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 1U);

const TBlob& lhs = inputs[0];
const TBlob& rhs = inputs[1];
const TBlob& out = outputs[0];

if ((out.shape_.Size() == 0U) || (req[0] == kNullOp)) return;

if (lhs.type_flag_ == rhs.type_flag_) {
BinaryBroadcastComputeWithBool<xpu, OP>(attrs, ctx, inputs, req, outputs);
return;
}

#ifndef _WIN32
MixedBinaryBroadcastCompute<xpu, OP, LOP, ROP>(attrs, ctx, inputs, req, outputs);
#else
MixedBinaryBroadcastCompute<xpu, OP>(attrs, ctx, inputs, req, outputs);
#endif
}

template<typename xpu, typename LOP, typename ROP>
void MixedBinaryBackwardUseIn(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
Expand Down
9 changes: 5 additions & 4 deletions src/operator/operator_tune-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ class OperatorTune : public OperatorTuneByType<DType> {
if (!initialized_) {
initialized_ = true;
// Generate some random data for calling the operator kernels
data_set_.reserve(0x100);
data_set_ =
std::unique_ptr<DType[]>(reinterpret_cast<DType*>(new char[0x100 * sizeof(DType)]));
std::random_device rd;
std::mt19937 gen(rd());
if (!std::is_integral<DType>::value) {
Expand All @@ -136,7 +137,7 @@ class OperatorTune : public OperatorTuneByType<DType> {
--n;
continue;
}
data_set_.emplace_back(val);
data_set_[n] = val;
}
} else {
std::uniform_int_distribution<> dis(-128, 127);
Expand All @@ -147,7 +148,7 @@ class OperatorTune : public OperatorTuneByType<DType> {
--n;
continue;
}
data_set_.emplace_back(val);
data_set_[n] = val;
}
}
// Use this environment variable to generate new tuning statistics
Expand Down Expand Up @@ -517,7 +518,7 @@ class OperatorTune : public OperatorTuneByType<DType> {
/*! \brief Number of passes to obtain an average */
static constexpr duration_t OUTSIDE_COUNT = (1 << OUTSIDE_COUNT_SHIFT);
/*! \brief Random data for timing operator calls */
static std::vector<DType> data_set_;
static std::unique_ptr<DType[]> data_set_;
/*! \brief Operators tuned */
static std::unordered_set<std::string> operator_names_;
/*! \brief Arbitary object to modify in OMP loop */
Expand Down
10 changes: 5 additions & 5 deletions src/operator/operator_tune.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ double OperatorTuneBase::tuning_weight_scale_ = 0.0;
*/
#define IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(__typ$) \
template<> bool OperatorTune<__typ$>::initialized_ = false; \
template<> std::vector<__typ$> OperatorTune<__typ$>::data_set_ = {}; \
template<> std::unique_ptr<__typ$[]> OperatorTune<__typ$>::data_set_ = nullptr; \
template<> volatile tune::TuningMode OperatorTuneByType<__typ$>::tuning_mode_ = tune::kAuto; \
template<> volatile int OperatorTune<__typ$>::volatile_int_ = 9; /* arbitrary number */ \
template<> std::unordered_set<std::string> OperatorTune<__typ$>::operator_names_({}); \
Expand Down Expand Up @@ -314,10 +314,10 @@ IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_logical_not);
IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::nt); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::clip); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::clip); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::plus); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::minus); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::mul); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::div); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::plus); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::minus); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::mul); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::div); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::true_divide); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::minus_sign); // NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rminus); // NOLINT()
Expand Down
32 changes: 32 additions & 0 deletions src/operator/tensor/elemwise_binary_broadcast_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,9 @@ void BinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
} else {
if (req[0] != kNullOp) {
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
if (outputs[0].type_flag_ == mshadow::kBool) {
LOG(FATAL) << "Operator " << attrs.op->name << " does not support boolean type";
}
MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(ndim, NDim, {
mshadow::Shape<NDim> oshape = new_oshape.get<NDim>();
Expand All @@ -361,6 +364,35 @@ void BinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
}
}

template<typename xpu, typename OP>
void BinaryBroadcastComputeWithBool(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
if (outputs[0].shape_.Size() == 0U) return;
mxnet::TShape new_lshape, new_rshape, new_oshape;
int ndim = BinaryBroadcastShapeCompact(inputs[0].shape_, inputs[1].shape_, outputs[0].shape_,
&new_lshape, &new_rshape, &new_oshape);
if (!ndim) {
ElemwiseBinaryOp::ComputeWithBool<xpu, OP>(attrs, ctx, inputs, req, outputs);
} else {
if (req[0] != kNullOp) {
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
MSHADOW_TYPE_SWITCH_WITH_BOOL(outputs[0].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(ndim, NDim, {
mshadow::Shape<NDim> oshape = new_oshape.get<NDim>();
mshadow::Shape<NDim> lstride = mxnet_op::calc_stride(new_lshape.get<NDim>());
mshadow::Shape<NDim> rstride = mxnet_op::calc_stride(new_rshape.get<NDim>());
mxnet_op::Kernel<mxnet_op::binary_broadcast_kernel<NDim, OP>, xpu>::
template LaunchEx(s, new_oshape.Size(), req[0], lstride, rstride, oshape,
inputs[0].dptr<DType>(), inputs[1].dptr<DType>(), outputs[0].dptr<DType>());
});
});
}
}
}

template<typename xpu, typename OP>
void BinaryBroadcastComputeLogic(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
Expand Down
Loading