diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 4cbb17de9417..b84e39f96cc2 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -114,8 +114,10 @@ using std::is_integral; #define MXNET_BINARY_LOGIC_OP_NC(name, expr) \ struct name : public mxnet_op::tunable { \ - template \ - MSHADOW_XINLINE static bool Map(DType a, DType b) { \ + template \ + MSHADOW_XINLINE static bool Map(DType lhs, EType rhs) { \ + long double a = static_cast(lhs); \ + long double b = static_cast(rhs); \ return (expr); \ } \ } diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h index 3f1c8046cf85..bc8c0afcf1a2 100644 --- a/src/operator/mxnet_op.h +++ b/src/operator/mxnet_op.h @@ -860,6 +860,13 @@ struct op_with_req { KERNEL_ASSIGN(out[i], req, OP::Map(in[i], value)); } + /*! \brief input is two tensors with different type and with a boolean output tensor */ + template::value, int>::type = 0> + MSHADOW_XINLINE static void Map(index_t i, bool *out, const LType *lhs, const RType *rhs) { + KERNEL_ASSIGN(out[i], req, OP::Map(lhs[i], rhs[i])); + } + #ifndef _WIN32 /*! \brief inputs are two tensors with a half_t output tensor */ templatesize(), 2U); CHECK_EQ(out_attrs->size(), 1U); if (in_attrs->at(0) == -1 && in_attrs->at(1) == -1) return false; - TYPE_ASSIGN_CHECK(*in_attrs, 0, in_attrs->at(1)); - TYPE_ASSIGN_CHECK(*in_attrs, 1, in_attrs->at(0)); TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kBool); return true; } diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h index ffd0f123070a..6f6711e9f881 100644 --- a/src/operator/tensor/elemwise_binary_broadcast_op.h +++ b/src/operator/tensor/elemwise_binary_broadcast_op.h @@ -208,6 +208,25 @@ struct binary_broadcast_kernel { } } + /*! \brief Map function for binary_broadcast_kernel */ + template + MSHADOW_XINLINE static void Map(index_t base, index_t length, OpReqType req, + const Shape &lstride, const Shape &rstride, + const Shape &oshape, LType *lhs, RType *rhs, + OType *out) { + Shape coord = unravel(base, oshape); + auto lidx = static_cast(dot(coord, lstride)); + auto ridx = static_cast(dot(coord, rstride)); + KERNEL_ASSIGN(out[base], req, OP::Map(lhs[lidx], rhs[ridx])); + // starts from 1 to avoid extra inc at end of loop + for (index_t i = 1; i < length; ++i) { + inc(&coord, oshape, &lidx, lstride, &ridx, rstride); + // When tuning, don't actually run the op, since it's not going to be tuned against + // the actual op we'll eventually be using + KERNEL_ASSIGN(out[base + i], req, OP::Map(lhs[lidx], rhs[ridx])); + } + } + /*! \brief Map function for binary_broadcast_kernel */ template MSHADOW_XINLINE static void Map(index_t base, index_t length, OpReqType req, @@ -430,23 +449,28 @@ void BinaryBroadcastComputeLogic(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { if (outputs[0].shape_.Size() == 0U) return; mxnet::TShape new_lshape, new_rshape, new_oshape; - int ndim = BinaryBroadcastShapeCompact(inputs[0].shape_, inputs[1].shape_, outputs[0].shape_, + const TBlob& lhs = inputs[0]; + const TBlob& rhs = inputs[1]; + const TBlob& out = outputs[0]; + int ndim = BinaryBroadcastShapeCompact(lhs.shape_, rhs.shape_, out.shape_, &new_lshape, &new_rshape, &new_oshape); if (!ndim) { ElemwiseBinaryOp::ComputeLogic(attrs, ctx, inputs, req, outputs); } else { if (req[0] == kNullOp) return; mshadow::Stream *s = ctx.get_stream(); - MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[0].type_flag_, DType, { - BROADCAST_NDIM_SWITCH(ndim, NDim, { + MSHADOW_TYPE_SWITCH_WITH_BOOL(lhs.type_flag_, DType, { + MSHADOW_TYPE_SWITCH_WITH_BOOL(rhs.type_flag_, EType, { + BROADCAST_NDIM_SWITCH(ndim, NDim, { mshadow::Shape oshape = new_oshape.get(); mshadow::Shape lstride = mxnet_op::calc_stride(new_lshape.get()); mshadow::Shape rstride = mxnet_op::calc_stride(new_rshape.get()); mxnet_op::Kernel, xpu>:: template LaunchEx(s, new_oshape.Size(), req[0], lstride, rstride, oshape, - inputs[0].dptr(), inputs[1].dptr(), - outputs[0].dptr()); + lhs.dptr(), rhs.dptr(), + out.dptr()); }); + }); }); } } diff --git a/src/operator/tensor/elemwise_binary_op.h b/src/operator/tensor/elemwise_binary_op.h index 01dca2e76ccc..c080570afab9 100644 --- a/src/operator/tensor/elemwise_binary_op.h +++ b/src/operator/tensor/elemwise_binary_op.h @@ -620,14 +620,16 @@ template CHECK_EQ(outputs.size(), 1U); MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[0].type_flag_, DType, { + MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[1].type_flag_, EType, { const size_t size = (minthree(outputs[0].Size(), inputs[0].Size(), inputs[1].Size()) + DataType::kLanes - 1) / DataType::kLanes; if (size != 0) { Kernel, xpu>::Launch(s, size, outputs[0].dptr(), inputs[0].dptr(), - inputs[1].dptr()); + inputs[1].dptr()); } + }); }); }); } diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index fca7c71f4b57..22058bae9bca 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -2627,6 +2627,15 @@ def hybrid_forward(self, F, a, b, *args, **kwargs): 'mod': (1.0, 5.0, None, None), 'power': (1.0, 3.0, lambda y, x1, x2: _np.power(x1, x2 - 1.0) * x2, lambda y, x1, x2: _np.power(x1, x2) * _np.log(x1)), + 'equal': (0.0, 2.0, None, None), + 'not_equal': (0.0, 2.0, None, None), + 'greater': (0.0, 2.0, None, None), + 'less': (0.0, 2.0, None, None), + 'greater_equal': (0.0, 2.0, None, None), + 'less_equal': (0.0, 2.0, None, None), + 'logical_and': (0.0, 2.0, None, None), + 'logical_or': (0.0, 2.0, None, None), + 'logical_xor': (0.0, 2.0, None, None), } shape_pairs = [((3, 2), (3, 2)),