From 4a864d5d0199b6393fbaa5a84a2a288daa0e47ba Mon Sep 17 00:00:00 2001 From: Hetzel Date: Mon, 15 Jan 2018 23:11:45 +0100 Subject: [PATCH] enabling multithreading in broadcast_reduce --- src/operator/tensor/broadcast_reduce-inl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/operator/tensor/broadcast_reduce-inl.h b/src/operator/tensor/broadcast_reduce-inl.h index 1bfe68a771d5..7f3e5685a086 100644 --- a/src/operator/tensor/broadcast_reduce-inl.h +++ b/src/operator/tensor/broadcast_reduce-inl.h @@ -197,6 +197,7 @@ void seq_reduce_compute(const int N, const int M, const bool addto, const DType *big, DType *small, const Shape bshape, const Shape sshape, const Shape rshape, const Shape rstride) { + #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) for (int idx = 0; idx < N; ++idx) { seq_reduce_assign(idx, M, addto, big, small, bshape, sshape, rshape, rstride); @@ -266,6 +267,7 @@ void seq_reduce_compute(const int N, const int M, const bool addto, const Shape lhs_shape, const Shape lhs_stride, const Shape rhs_shape, const Shape rhs_stride, const Shape& lhs_shape0, const Shape& rhs_shape0) { + #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) for (int idx = 0; idx < N; ++idx) { seq_reduce_assign(idx, M, addto, big, lhs, rhs, small, big_shape, lhs_shape0, rhs_shape0, small_shape, rshape, lhs_shape, rhs_shape, rstride,