Skip to content

Commit

Permalink
enabling multithreading in broadcast_reduce (apache#9444)
Browse files Browse the repository at this point in the history
  • Loading branch information
asmushetzel authored and Pedro Larroy committed Jan 18, 2018
1 parent 95b1d1a commit 565576a
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/operator/tensor/broadcast_reduce-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ void seq_reduce_compute(const int N, const int M, const bool addto,
const DType *big, DType *small, const Shape<ndim> bshape,
const Shape<ndim> sshape, const Shape<ndim> rshape,
const Shape<ndim> rstride) {
#pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
for (int idx = 0; idx < N; ++idx) {
seq_reduce_assign<Reducer, ndim, DType, OP>(idx, M, addto, big, small, bshape, sshape, rshape,
rstride);
Expand Down Expand Up @@ -266,6 +267,7 @@ void seq_reduce_compute(const int N, const int M, const bool addto,
const Shape<ndim> lhs_shape, const Shape<ndim> lhs_stride,
const Shape<ndim> rhs_shape, const Shape<ndim> rhs_stride,
const Shape<ndim>& lhs_shape0, const Shape<ndim>& rhs_shape0) {
#pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
for (int idx = 0; idx < N; ++idx) {
seq_reduce_assign<Reducer, ndim, DType, OP1, OP2>(idx, M, addto, big, lhs, rhs, small,
big_shape, lhs_shape0, rhs_shape0, small_shape, rshape, lhs_shape, rhs_shape, rstride,
Expand Down

0 comments on commit 565576a

Please sign in to comment.