Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A few fixes #163

Merged
merged 2 commits into from
Aug 11, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/operator/batch_norm.cu
Original file line number Diff line number Diff line change
@@ -283,7 +283,7 @@ __global__ void BatchNormalizationUpdateOutputKernel(
}

// Write normalized and update the output
const AccReal gamma = weight.numElements() > 0
const AccReal gamma = ((flags & FIX_GAMMA_FLAG) == 0 && weight.numElements() > 0)
? ScalarConvert<DType, AccReal>::to(weight[plane])
: ScalarConvert<int, AccReal>::to(1);
const AccReal beta = bias.numElements() > 0 ? ScalarConvert<DType, AccReal>::to(bias[plane])
@@ -332,7 +332,7 @@ static __global__ void BatchNormalizationBackwardKernel(
invstd = VARIANCE_TO_INVSTD(tensors.runningVar[plane], eps);
}

const AccReal weightVal = tensors.weight.numElements() > 0 ?
const AccReal weightVal = ((flags & FIX_GAMMA_FLAG) == 0 && tensors.weight.numElements() > 0) ?
ScalarConvert<DType, AccReal>::to(tensors.weight[plane]) : AccReal(1);
const AccReal norm = AccReal(1) / N;

60 changes: 40 additions & 20 deletions src/operator/random/sample_op.cu
Original file line number Diff line number Diff line change
@@ -28,21 +28,20 @@ namespace op {

// GPU versions of uniform and normal distribution.
template<>
void SampleUniform_<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
void SampleUniformDnsImpl<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const OpReqType& req,
TBlob* output) {
using namespace mxnet::op;
using namespace mshadow::expr;
typedef gpu xpu;
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
const SampleUniformParam& param = nnvm::get<SampleUniformParam>(attrs.parsed);
mshadow::Random<xpu, float> *prnd = ctx.requested[0].get_random<xpu, float>(s);
if (outputs[0].type_flag_ != mshadow::kFloat32) {
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
if (output->type_flag_ != mshadow::kFloat32) {
MSHADOW_REAL_TYPE_SWITCH(output->type_flag_, DType, {
// Not float32: use workspace and copy to output
mshadow::Tensor<xpu, 2, DType> out = outputs[0].FlatTo2D<xpu, DType>(s);
mshadow::Tensor<xpu, 2, DType> out = output->FlatTo2D<xpu, DType>(s);
mshadow::Tensor<xpu, 1, float> workspace =
ctx.requested[1].get_space_typed<xpu, 1, float>
(mshadow::Shape1(out.shape_.Size()), s);
@@ -51,27 +50,36 @@ void SampleUniform_<gpu>(const nnvm::NodeAttrs& attrs,
});
} else {
// float32: write directly into output
mshadow::Tensor<xpu, 2, float> out = outputs[0].FlatTo2D<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> out = output->FlatTo2D<xpu, float>(s);
prnd->SampleUniform(&out, param.low, param.high);
}
}

template<>
void SampleNormal_<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
void SampleUniform_<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
TBlob out = outputs[0];
SampleUniformDnsImpl<gpu>(attrs, ctx, req[0], &out);
}

template<>
void SampleNormalDnsImpl<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const OpReqType& req,
TBlob* output) {
using namespace mxnet::op;
using namespace mshadow::expr;
typedef gpu xpu;
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
const SampleNormalParam& param = nnvm::get<SampleNormalParam>(attrs.parsed);
mshadow::Random<xpu, float> *prnd = ctx.requested[0].get_random<xpu, float>(s);
if (outputs[0].type_flag_ != mshadow::kFloat32) {
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
if (output->type_flag_ != mshadow::kFloat32) {
MSHADOW_REAL_TYPE_SWITCH(output->type_flag_, DType, {
// Not float32: use workspace and copy to output
mshadow::Tensor<xpu, 2, DType> out = outputs[0].FlatTo2D<xpu, DType>(s);
mshadow::Tensor<xpu, 2, DType> out = output->FlatTo2D<xpu, DType>(s);
mshadow::Tensor<xpu, 1, float> workspace =
ctx.requested[1].get_space_typed<xpu, 1, float>
(mshadow::Shape1(out.shape_.Size()), s);
@@ -80,16 +88,28 @@ void SampleNormal_<gpu>(const nnvm::NodeAttrs& attrs,
});
} else {
// float32: write directly into output
mshadow::Tensor<xpu, 2, float> out = outputs[0].FlatTo2D<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> out = output->FlatTo2D<xpu, float>(s);
prnd->SampleGaussian(&out, param.loc, param.scale);
}
}

template<>
void SampleNormal_<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
TBlob out = outputs[0];
SampleNormalDnsImpl<gpu>(attrs, ctx, req[0], &out);
}

NNVM_REGISTER_OP(random_uniform)
.set_attr<FCompute>("FCompute<gpu>", SampleUniform_<gpu>);
.set_attr<FCompute>("FCompute<gpu>", SampleUniform_<gpu>)
.set_attr<FComputeEx>("FComputeEx<gpu>", SampleUniformEx_<gpu>);

NNVM_REGISTER_OP(random_normal)
.set_attr<FCompute>("FCompute<gpu>", SampleNormal_<gpu>);
.set_attr<FCompute>("FCompute<gpu>", SampleNormal_<gpu>)
.set_attr<FComputeEx>("FComputeEx<gpu>", SampleNormalEx_<gpu>);

} // namespace op
} // namespace mxnet
3 changes: 1 addition & 2 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
@@ -867,7 +867,6 @@ def check_batchnorm_training(stype):
rolling_mean = np.random.uniform(size=s)
rolling_std = np.random.uniform(size=s)

stype = 'row_sparse'
data = mx.symbol.Variable('data', stype=stype)
in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(gamma).tostype(stype),
mx.nd.array(beta).tostype(stype)]
@@ -935,7 +934,7 @@ def check_batchnorm_training(stype):
test = mx.symbol.BatchNorm(data, fix_gamma=False, use_global_stats=True, axis=chaxis)
check_numeric_gradient(test, in_location, xmean_std, numeric_eps=1e-2, rtol=0.2, atol=0.01)

stypes = ['row_sparse', 'csr', 'default']

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no longer supporting csr?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all aux data are in 1D, can't convert them to csr.

stypes = ['row_sparse', 'default']
for stype in stypes:
check_batchnorm_training(stype)

4 changes: 4 additions & 0 deletions tests/python/unittest/test_sparse_ndarray.py
Original file line number Diff line number Diff line change
@@ -352,6 +352,10 @@ def test_sparse_nd_output_fallback():
assert(np.sum(out.asnumpy()) != 0)

def test_sparse_nd_random():
""" test sparse random operator on cpu """
# gpu random operator doesn't use fixed seed
if default_context().device_type is 'gpu':
return
shape = (100, 100)
fns = [mx.nd.random_uniform, mx.nd.random_normal, mx.nd.random_gamma]
for fn in fns: