From 7335267e7991d5eacef3d445968108a95d0f800a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Malte=20F=C3=B6rster?= <97973773+mfoerste4@users.noreply.github.com> Date: Sat, 16 Mar 2024 05:25:48 +0100 Subject: [PATCH] Fix illegal acces mean/stdev, sum add Kahan Summation (#2223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR addresses #2204 and #2205. * fixes illegal access / test coverage for mean row-wise kernel * fixes illegal access / test coverage for stdev row-wise kernel * modified sum kernels to utilize Kahan/Neumaier summation per thread, also increase load per thread to benefit from this FYI, @tfeher Authors: - Malte Förster (https://github.com/mfoerste4) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/2223 --- cpp/include/raft/stats/detail/mean.cuh | 4 +- cpp/include/raft/stats/detail/stddev.cuh | 4 +- cpp/include/raft/stats/detail/sum.cuh | 78 +++++++++++++++++---- cpp/test/stats/mean.cu | 66 +++++++++--------- cpp/test/stats/minmax.cu | 66 +++++++----------- cpp/test/stats/stddev.cu | 55 ++++++++++++--- cpp/test/stats/sum.cu | 89 +++++++++++++++++------- 7 files changed, 236 insertions(+), 126 deletions(-) diff --git a/cpp/include/raft/stats/detail/mean.cuh b/cpp/include/raft/stats/detail/mean.cuh index cf4dbc7aa3..6c330acb26 100644 --- a/cpp/include/raft/stats/detail/mean.cuh +++ b/cpp/include/raft/stats/detail/mean.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ RAFT_KERNEL meanKernelRowMajor(Type* mu, const Type* data, IdxType D, IdxType N) __syncthreads(); raft::myAtomicAdd(smu + thisColId, thread_data); __syncthreads(); - if (threadIdx.x < ColsPerBlk) raft::myAtomicAdd(mu + colId, smu[thisColId]); + if (threadIdx.x < ColsPerBlk && colId < D) raft::myAtomicAdd(mu + colId, smu[thisColId]); } template diff --git a/cpp/include/raft/stats/detail/stddev.cuh b/cpp/include/raft/stats/detail/stddev.cuh index acee4a944e..bc2644a233 100644 --- a/cpp/include/raft/stats/detail/stddev.cuh +++ b/cpp/include/raft/stats/detail/stddev.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ RAFT_KERNEL stddevKernelRowMajor(Type* std, const Type* data, IdxType D, IdxType __syncthreads(); raft::myAtomicAdd(sstd + thisColId, thread_data); __syncthreads(); - if (threadIdx.x < ColsPerBlk) raft::myAtomicAdd(std + colId, sstd[thisColId]); + if (threadIdx.x < ColsPerBlk && colId < D) raft::myAtomicAdd(std + colId, sstd[thisColId]); } template diff --git a/cpp/include/raft/stats/detail/sum.cuh b/cpp/include/raft/stats/detail/sum.cuh index bb45eb50f4..4f85536e6c 100644 --- a/cpp/include/raft/stats/detail/sum.cuh +++ b/cpp/include/raft/stats/detail/sum.cuh @@ -34,30 +34,72 @@ RAFT_KERNEL sumKernelRowMajor(Type* mu, const Type* data, IdxType D, IdxType N) IdxType thisRowId = threadIdx.x / ColsPerBlk; IdxType colId = thisColId + ((IdxType)blockIdx.y * ColsPerBlk); IdxType rowId = thisRowId + ((IdxType)blockIdx.x * RowsPerBlkPerIter); - Type thread_data = Type(0); + Type thread_sum = Type(0); const IdxType stride = RowsPerBlkPerIter * gridDim.x; - for (IdxType i = rowId; i < N; i += stride) - thread_data += (colId < D) ? data[i * D + colId] : Type(0); + for (IdxType i = rowId; i < N; i += stride) { + thread_sum += (colId < D) ? data[i * D + colId] : Type(0); + } __shared__ Type smu[ColsPerBlk]; if (threadIdx.x < ColsPerBlk) smu[threadIdx.x] = Type(0); __syncthreads(); - raft::myAtomicAdd(smu + thisColId, thread_data); + raft::myAtomicAdd(smu + thisColId, thread_sum); + __syncthreads(); + if (threadIdx.x < ColsPerBlk && colId < D) raft::myAtomicAdd(mu + colId, smu[thisColId]); +} + +template +RAFT_KERNEL sumKahanKernelRowMajor(Type* mu, const Type* data, IdxType D, IdxType N) +{ + constexpr int RowsPerBlkPerIter = TPB / ColsPerBlk; + IdxType thisColId = threadIdx.x % ColsPerBlk; + IdxType thisRowId = threadIdx.x / ColsPerBlk; + IdxType colId = thisColId + ((IdxType)blockIdx.y * ColsPerBlk); + IdxType rowId = thisRowId + ((IdxType)blockIdx.x * RowsPerBlkPerIter); + Type thread_sum = Type(0); + Type thread_c = Type(0); + const IdxType stride = RowsPerBlkPerIter * gridDim.x; + for (IdxType i = rowId; i < N; i += stride) { + // KahanBabushkaNeumaierSum + const Type cur_value = (colId < D) ? data[i * D + colId] : Type(0); + const Type t = thread_sum + cur_value; + if (abs(thread_sum) >= abs(cur_value)) { + thread_c += (thread_sum - t) + cur_value; + } else { + thread_c += (cur_value - t) + thread_sum; + } + thread_sum = t; + } + thread_sum += thread_c; + __shared__ Type smu[ColsPerBlk]; + if (threadIdx.x < ColsPerBlk) smu[threadIdx.x] = Type(0); + __syncthreads(); + raft::myAtomicAdd(smu + thisColId, thread_sum); __syncthreads(); if (threadIdx.x < ColsPerBlk && colId < D) raft::myAtomicAdd(mu + colId, smu[thisColId]); } template -RAFT_KERNEL sumKernelColMajor(Type* mu, const Type* data, IdxType D, IdxType N) +RAFT_KERNEL sumKahanKernelColMajor(Type* mu, const Type* data, IdxType D, IdxType N) { typedef cub::BlockReduce BlockReduce; __shared__ typename BlockReduce::TempStorage temp_storage; - Type thread_data = Type(0); + Type thread_sum = Type(0); + Type thread_c = Type(0); IdxType colStart = N * blockIdx.x; for (IdxType i = threadIdx.x; i < N; i += TPB) { - IdxType idx = colStart + i; - thread_data += data[idx]; + // KahanBabushkaNeumaierSum + IdxType idx = colStart + i; + const Type cur_value = data[idx]; + const Type t = thread_sum + cur_value; + if (abs(thread_sum) >= abs(cur_value)) { + thread_c += (thread_sum - t) + cur_value; + } else { + thread_c += (cur_value - t) + thread_sum; + } + thread_sum = t; } - Type acc = BlockReduce(temp_storage).Sum(thread_data); + thread_sum += thread_c; + Type acc = BlockReduce(temp_storage).Sum(thread_sum); if (threadIdx.x == 0) { mu[blockIdx.x] = acc; } } @@ -66,15 +108,21 @@ void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, c { static const int TPB = 256; if (rowMajor) { - static const int RowsPerThread = 4; - static const int ColsPerBlk = 32; - static const int RowsPerBlk = (TPB / ColsPerBlk) * RowsPerThread; - dim3 grid(raft::ceildiv(N, (IdxType)RowsPerBlk), raft::ceildiv(D, (IdxType)ColsPerBlk)); + static const int ColsPerBlk = 8; + static const int MinRowsPerThread = 16; + static const int MinRowsPerBlk = (TPB / ColsPerBlk) * MinRowsPerThread; + static const int MaxBlocksDimX = 8192; + + const IdxType grid_y = raft::ceildiv(D, (IdxType)ColsPerBlk); + const IdxType grid_x = + raft::min((IdxType)MaxBlocksDimX, raft::ceildiv(N, (IdxType)MinRowsPerBlk)); + + dim3 grid(grid_x, grid_y); RAFT_CUDA_TRY(cudaMemset(output, 0, sizeof(Type) * D)); - sumKernelRowMajor + sumKahanKernelRowMajor <<>>(output, input, D, N); } else { - sumKernelColMajor<<>>(output, input, D, N); + sumKahanKernelColMajor<<>>(output, input, D, N); } RAFT_CUDA_TRY(cudaPeekAtLastError()); } diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu index 67931e2eed..61b57ce739 100644 --- a/cpp/test/stats/mean.cu +++ b/cpp/test/stats/mean.cu @@ -95,39 +95,39 @@ class MeanTest : public ::testing::TestWithParam> { // Note: For 1024 samples, 256 experiments, a mean of 1.0 with stddev=1.0, the // measured mean (of a normal distribution) will fall outside of an epsilon of // 0.15 only 4/10000 times. (epsilon of 0.1 will fail 30/100 times) -const std::vector> inputsf = {{0.15f, 1.f, 1024, 32, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 64, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 128, true, false, 1234ULL}, - {0.15f, 1.f, 1024, 256, true, false, 1234ULL}, - {0.15f, -1.f, 1024, 32, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 64, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 128, false, false, 1234ULL}, - {0.15f, -1.f, 1024, 256, false, false, 1234ULL}, - {0.15f, 1.f, 1024, 32, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 64, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 128, true, true, 1234ULL}, - {0.15f, 1.f, 1024, 256, true, true, 1234ULL}, - {0.15f, -1.f, 1024, 32, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 64, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 128, false, true, 1234ULL}, - {0.15f, -1.f, 1024, 256, false, true, 1234ULL}}; - -const std::vector> inputsd = {{0.15, 1.0, 1024, 32, true, false, 1234ULL}, - {0.15, 1.0, 1024, 64, true, false, 1234ULL}, - {0.15, 1.0, 1024, 128, true, false, 1234ULL}, - {0.15, 1.0, 1024, 256, true, false, 1234ULL}, - {0.15, -1.0, 1024, 32, false, false, 1234ULL}, - {0.15, -1.0, 1024, 64, false, false, 1234ULL}, - {0.15, -1.0, 1024, 128, false, false, 1234ULL}, - {0.15, -1.0, 1024, 256, false, false, 1234ULL}, - {0.15, 1.0, 1024, 32, true, true, 1234ULL}, - {0.15, 1.0, 1024, 64, true, true, 1234ULL}, - {0.15, 1.0, 1024, 128, true, true, 1234ULL}, - {0.15, 1.0, 1024, 256, true, true, 1234ULL}, - {0.15, -1.0, 1024, 32, false, true, 1234ULL}, - {0.15, -1.0, 1024, 64, false, true, 1234ULL}, - {0.15, -1.0, 1024, 128, false, true, 1234ULL}, - {0.15, -1.0, 1024, 256, false, true, 1234ULL}}; +const std::vector> inputsf = { + {0.15f, 1.f, 1024, 32, true, false, 1234ULL}, {0.15f, 1.f, 1024, 64, true, false, 1234ULL}, + {0.15f, 1.f, 1024, 128, true, false, 1234ULL}, {0.15f, 1.f, 1024, 256, true, false, 1234ULL}, + {0.15f, -1.f, 1024, 32, false, false, 1234ULL}, {0.15f, -1.f, 1024, 64, false, false, 1234ULL}, + {0.15f, -1.f, 1024, 128, false, false, 1234ULL}, {0.15f, -1.f, 1024, 256, false, false, 1234ULL}, + {0.15f, 1.f, 1024, 32, true, true, 1234ULL}, {0.15f, 1.f, 1024, 64, true, true, 1234ULL}, + {0.15f, 1.f, 1024, 128, true, true, 1234ULL}, {0.15f, 1.f, 1024, 256, true, true, 1234ULL}, + {0.15f, -1.f, 1024, 32, false, true, 1234ULL}, {0.15f, -1.f, 1024, 64, false, true, 1234ULL}, + {0.15f, -1.f, 1024, 128, false, true, 1234ULL}, {0.15f, -1.f, 1024, 256, false, true, 1234ULL}, + {0.15f, -1.f, 1030, 1, false, false, 1234ULL}, {0.15f, -1.f, 1030, 60, true, false, 1234ULL}, + {2.0f, -1.f, 31, 120, false, false, 1234ULL}, {2.0f, -1.f, 1, 130, true, false, 1234ULL}, + {0.15f, -1.f, 1030, 1, false, true, 1234ULL}, {0.15f, -1.f, 1030, 60, true, true, 1234ULL}, + {2.0f, -1.f, 31, 120, false, true, 1234ULL}, {2.0f, -1.f, 1, 130, false, true, 1234ULL}, + {2.0f, -1.f, 1, 1, false, false, 1234ULL}, {2.0f, -1.f, 1, 1, false, true, 1234ULL}, + {2.0f, -1.f, 7, 23, false, false, 1234ULL}, {2.0f, -1.f, 7, 23, false, true, 1234ULL}, + {2.0f, -1.f, 17, 5, false, false, 1234ULL}, {2.0f, -1.f, 17, 5, false, true, 1234ULL}}; + +const std::vector> inputsd = { + {0.15, 1.0, 1024, 32, true, false, 1234ULL}, {0.15, 1.0, 1024, 64, true, false, 1234ULL}, + {0.15, 1.0, 1024, 128, true, false, 1234ULL}, {0.15, 1.0, 1024, 256, true, false, 1234ULL}, + {0.15, -1.0, 1024, 32, false, false, 1234ULL}, {0.15, -1.0, 1024, 64, false, false, 1234ULL}, + {0.15, -1.0, 1024, 128, false, false, 1234ULL}, {0.15, -1.0, 1024, 256, false, false, 1234ULL}, + {0.15, 1.0, 1024, 32, true, true, 1234ULL}, {0.15, 1.0, 1024, 64, true, true, 1234ULL}, + {0.15, 1.0, 1024, 128, true, true, 1234ULL}, {0.15, 1.0, 1024, 256, true, true, 1234ULL}, + {0.15, -1.0, 1024, 32, false, true, 1234ULL}, {0.15, -1.0, 1024, 64, false, true, 1234ULL}, + {0.15, -1.0, 1024, 128, false, true, 1234ULL}, {0.15, -1.0, 1024, 256, false, true, 1234ULL}, + {0.15, -1.0, 1030, 1, false, false, 1234ULL}, {0.15, -1.0, 1030, 60, true, false, 1234ULL}, + {2.0, -1.0, 31, 120, false, false, 1234ULL}, {2.0, -1.0, 1, 130, true, false, 1234ULL}, + {0.15, -1.0, 1030, 1, false, true, 1234ULL}, {0.15, -1.0, 1030, 60, true, true, 1234ULL}, + {2.0, -1.0, 31, 120, false, true, 1234ULL}, {2.0, -1.0, 1, 130, false, true, 1234ULL}, + {2.0, -1.0, 1, 1, false, false, 1234ULL}, {2.0, -1.0, 1, 1, false, true, 1234ULL}, + {2.0, -1.0, 7, 23, false, false, 1234ULL}, {2.0, -1.0, 7, 23, false, true, 1234ULL}, + {2.0, -1.0, 17, 5, false, false, 1234ULL}, {2.0, -1.0, 17, 5, false, true, 1234ULL}}; typedef MeanTest MeanTestF; TEST_P(MeanTestF, Result) diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index 7563cb12be..fd909ebb90 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -145,45 +145,33 @@ class MinMaxTest : public ::testing::TestWithParam> { rmm::device_uvector minmax_ref; }; -const std::vector> inputsf = {{0.00001f, 1024, 32, 1234ULL}, - {0.00001f, 1024, 64, 1234ULL}, - {0.00001f, 1024, 128, 1234ULL}, - {0.00001f, 1024, 256, 1234ULL}, - {0.00001f, 1024, 512, 1234ULL}, - {0.00001f, 1024, 1024, 1234ULL}, - {0.00001f, 4096, 32, 1234ULL}, - {0.00001f, 4096, 64, 1234ULL}, - {0.00001f, 4096, 128, 1234ULL}, - {0.00001f, 4096, 256, 1234ULL}, - {0.00001f, 4096, 512, 1234ULL}, - {0.00001f, 4096, 1024, 1234ULL}, - {0.00001f, 8192, 32, 1234ULL}, - {0.00001f, 8192, 64, 1234ULL}, - {0.00001f, 8192, 128, 1234ULL}, - {0.00001f, 8192, 256, 1234ULL}, - {0.00001f, 8192, 512, 1234ULL}, - {0.00001f, 8192, 1024, 1234ULL}, - {0.00001f, 1024, 8192, 1234ULL}}; - -const std::vector> inputsd = {{0.0000001, 1024, 32, 1234ULL}, - {0.0000001, 1024, 64, 1234ULL}, - {0.0000001, 1024, 128, 1234ULL}, - {0.0000001, 1024, 256, 1234ULL}, - {0.0000001, 1024, 512, 1234ULL}, - {0.0000001, 1024, 1024, 1234ULL}, - {0.0000001, 4096, 32, 1234ULL}, - {0.0000001, 4096, 64, 1234ULL}, - {0.0000001, 4096, 128, 1234ULL}, - {0.0000001, 4096, 256, 1234ULL}, - {0.0000001, 4096, 512, 1234ULL}, - {0.0000001, 4096, 1024, 1234ULL}, - {0.0000001, 8192, 32, 1234ULL}, - {0.0000001, 8192, 64, 1234ULL}, - {0.0000001, 8192, 128, 1234ULL}, - {0.0000001, 8192, 256, 1234ULL}, - {0.0000001, 8192, 512, 1234ULL}, - {0.0000001, 8192, 1024, 1234ULL}, - {0.0000001, 1024, 8192, 1234ULL}}; +const std::vector> inputsf = { + {0.00001f, 1024, 32, 1234ULL}, {0.00001f, 1024, 64, 1234ULL}, {0.00001f, 1024, 128, 1234ULL}, + {0.00001f, 1024, 256, 1234ULL}, {0.00001f, 1024, 512, 1234ULL}, {0.00001f, 1024, 1024, 1234ULL}, + {0.00001f, 4096, 32, 1234ULL}, {0.00001f, 4096, 64, 1234ULL}, {0.00001f, 4096, 128, 1234ULL}, + {0.00001f, 4096, 256, 1234ULL}, {0.00001f, 4096, 512, 1234ULL}, {0.00001f, 4096, 1024, 1234ULL}, + {0.00001f, 8192, 32, 1234ULL}, {0.00001f, 8192, 64, 1234ULL}, {0.00001f, 8192, 128, 1234ULL}, + {0.00001f, 8192, 256, 1234ULL}, {0.00001f, 8192, 512, 1234ULL}, {0.00001f, 8192, 1024, 1234ULL}, + {0.00001f, 1024, 8192, 1234ULL}, {0.00001f, 1023, 5, 1234ULL}, {0.00001f, 1025, 30, 1234ULL}, + {0.00001f, 2047, 65, 1234ULL}, {0.00001f, 2049, 22, 1234ULL}, {0.00001f, 31, 644, 1234ULL}, + {0.00001f, 33, 999, 1234ULL}, {0.00001f, 1, 1, 1234ULL}, {0.00001f, 7, 23, 1234ULL}, + {0.00001f, 17, 5, 1234ULL}}; + +const std::vector> inputsd = { + {0.0000001, 1024, 32, 1234ULL}, {0.0000001, 1024, 64, 1234ULL}, + {0.0000001, 1024, 128, 1234ULL}, {0.0000001, 1024, 256, 1234ULL}, + {0.0000001, 1024, 512, 1234ULL}, {0.0000001, 1024, 1024, 1234ULL}, + {0.0000001, 4096, 32, 1234ULL}, {0.0000001, 4096, 64, 1234ULL}, + {0.0000001, 4096, 128, 1234ULL}, {0.0000001, 4096, 256, 1234ULL}, + {0.0000001, 4096, 512, 1234ULL}, {0.0000001, 4096, 1024, 1234ULL}, + {0.0000001, 8192, 32, 1234ULL}, {0.0000001, 8192, 64, 1234ULL}, + {0.0000001, 8192, 128, 1234ULL}, {0.0000001, 8192, 256, 1234ULL}, + {0.0000001, 8192, 512, 1234ULL}, {0.0000001, 8192, 1024, 1234ULL}, + {0.0000001, 1024, 8192, 1234ULL}, {0.0000001, 1023, 5, 1234ULL}, + {0.0000001, 1025, 30, 1234ULL}, {0.0000001, 2047, 65, 1234ULL}, + {0.0000001, 2049, 22, 1234ULL}, {0.0000001, 31, 644, 1234ULL}, + {0.0000001, 33, 999, 1234ULL}, {0.0000001, 1, 1, 1234ULL}, + {0.0000001, 7, 23, 1234ULL}, {0.0000001, 17, 5, 1234ULL}}; typedef MinMaxTest MinMaxTestF; TEST_P(MinMaxTestF, Result) diff --git a/cpp/test/stats/stddev.cu b/cpp/test/stats/stddev.cu index cf57d3a923..641621c1c6 100644 --- a/cpp/test/stats/stddev.cu +++ b/cpp/test/stats/stddev.cu @@ -141,7 +141,19 @@ const std::vector> inputsf = { {0.1f, -1.f, 2.f, 1024, 32, false, true, 1234ULL}, {0.1f, -1.f, 2.f, 1024, 64, false, true, 1234ULL}, {0.1f, -1.f, 2.f, 1024, 128, false, true, 1234ULL}, - {0.1f, -1.f, 2.f, 1024, 256, false, true, 1234ULL}}; + {0.1f, -1.f, 2.f, 1024, 256, false, true, 1234ULL}, + {0.1f, -1.f, 2.f, 1099, 97, false, false, 1234ULL}, + {0.1f, -1.f, 2.f, 1022, 694, true, false, 1234ULL}, + {0.5f, -1.f, 2.f, 31, 1, true, true, 1234ULL}, + {1.f, -1.f, 2.f, 1, 257, false, true, 1234ULL}, + {0.5f, -1.f, 2.f, 31, 1, false, false, 1234ULL}, + {1.f, -1.f, 2.f, 1, 257, true, false, 1234ULL}, + {1.f, -1.f, 2.f, 1, 1, false, false, 1234ULL}, + {1.f, -1.f, 2.f, 7, 23, false, false, 1234ULL}, + {1.f, -1.f, 2.f, 17, 5, false, false, 1234ULL}, + {1.f, -1.f, 2.f, 1, 1, false, true, 1234ULL}, + {1.f, -1.f, 2.f, 7, 23, false, true, 1234ULL}, + {1.f, -1.f, 2.f, 17, 5, false, true, 1234ULL}}; const std::vector> inputsd = { {0.1, 1.0, 2.0, 1024, 32, true, false, 1234ULL}, @@ -159,13 +171,33 @@ const std::vector> inputsd = { {0.1, -1.0, 2.0, 1024, 32, false, true, 1234ULL}, {0.1, -1.0, 2.0, 1024, 64, false, true, 1234ULL}, {0.1, -1.0, 2.0, 1024, 128, false, true, 1234ULL}, - {0.1, -1.0, 2.0, 1024, 256, false, true, 1234ULL}}; + {0.1, -1.0, 2.0, 1024, 256, false, true, 1234ULL}, + {0.1, -1.0, 2.0, 1099, 97, false, false, 1234ULL}, + {0.1, -1.0, 2.0, 1022, 694, true, false, 1234ULL}, + {0.5, -1.0, 2.0, 31, 1, true, true, 1234ULL}, + {1.0, -1.0, 2.0, 1, 257, false, true, 1234ULL}, + {0.5, -1.0, 2.0, 31, 1, false, false, 1234ULL}, + {1.0, -1.0, 2.0, 1, 257, true, false, 1234ULL}, + {1.0, -1.0, 2.0, 1, 1, false, false, 1234ULL}, + {1.0, -1.0, 2.0, 7, 23, false, false, 1234ULL}, + {1.0, -1.0, 2.0, 17, 5, false, false, 1234ULL}, + {1.0, -1.0, 2.0, 1, 1, false, true, 1234ULL}, + {1.0, -1.0, 2.0, 7, 23, false, true, 1234ULL}, + {1.0, -1.0, 2.0, 17, 5, false, true, 1234ULL}}; typedef StdDevTest StdDevTestF; TEST_P(StdDevTestF, Result) { - ASSERT_TRUE(devArrMatch( - params.stddev, stddev_act.data(), params.cols, CompareApprox(params.tolerance), stream)); + if (params.rows == 1) { + ASSERT_TRUE(devArrMatch( + float(0), stddev_act.data(), params.cols, CompareApprox(params.tolerance), stream)); + } else { + ASSERT_TRUE(devArrMatch(params.stddev, + stddev_act.data(), + params.cols, + CompareApprox(params.tolerance), + stream)); + } ASSERT_TRUE(devArrMatch(stddev_act.data(), vars_act.data(), @@ -177,11 +209,16 @@ TEST_P(StdDevTestF, Result) typedef StdDevTest StdDevTestD; TEST_P(StdDevTestD, Result) { - ASSERT_TRUE(devArrMatch(params.stddev, - stddev_act.data(), - params.cols, - CompareApprox(params.tolerance), - stream)); + if (params.rows == 1) { + ASSERT_TRUE(devArrMatch( + double(0), stddev_act.data(), params.cols, CompareApprox(params.tolerance), stream)); + } else { + ASSERT_TRUE(devArrMatch(params.stddev, + stddev_act.data(), + params.cols, + CompareApprox(params.tolerance), + stream)); + } ASSERT_TRUE(devArrMatch(stddev_act.data(), vars_act.data(), diff --git a/cpp/test/stats/sum.cu b/cpp/test/stats/sum.cu index 5a549f8ba4..bf2aa44a2c 100644 --- a/cpp/test/stats/sum.cu +++ b/cpp/test/stats/sum.cu @@ -33,7 +33,8 @@ template struct SumInputs { T tolerance; int rows, cols; - unsigned long long int seed; + bool rowMajor; + T value = T(1); }; template @@ -56,20 +57,34 @@ class SumTest : public ::testing::TestWithParam> { } protected: - void SetUp() override + void runTest() { int len = rows * cols; - T data_h[len]; + std::vector data_h(len); for (int i = 0; i < len; i++) { - data_h[i] = T(1); + data_h[i] = T(params.value); } - raft::update_device(data.data(), data_h, len, stream); - sum(handle, - raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(sum_act.data(), cols)); + raft::update_device(data.data(), data_h.data(), len, stream); + + if (params.rowMajor) { + using layout = raft::row_major; + sum(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(sum_act.data(), cols)); + } else { + using layout = raft::col_major; + sum(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(sum_act.data(), cols)); + } resource::sync_stream(handle, stream); + + double expected = double(params.rows) * params.value; + + ASSERT_TRUE(raft::devArrMatch( + T(expected), sum_act.data(), params.cols, raft::CompareApprox(params.tolerance))); } protected: @@ -81,27 +96,49 @@ class SumTest : public ::testing::TestWithParam> { rmm::device_uvector data, sum_act; }; -const std::vector> inputsf = { - {0.05f, 4, 5, 1234ULL}, {0.05f, 1024, 32, 1234ULL}, {0.05f, 1024, 256, 1234ULL}}; - -const std::vector> inputsd = {{0.05, 1024, 32, 1234ULL}, - {0.05, 1024, 256, 1234ULL}}; +const std::vector> inputsf = {{0.0001f, 4, 5, true, 1}, + {0.0001f, 1024, 32, true, 1}, + {0.0001f, 1024, 256, true, 1}, + {0.0001f, 100000000, 1, true, 0.001}, + {0.0001f, 1, 30, true, 0.001}, + {0.0001f, 1, 1, true, 0.001}, + {0.0001f, 17, 5, true, 0.001}, + {0.0001f, 7, 23, true, 0.001}, + {0.0001f, 3, 97, true, 0.001}, + {0.0001f, 4, 5, false, 1}, + {0.0001f, 1024, 32, false, 1}, + {0.0001f, 1024, 256, false, 1}, + {0.0001f, 100000000, 1, false, 0.001}, + {0.0001f, 1, 30, false, 0.001}, + {0.0001f, 1, 1, false, 0.001}, + {0.0001f, 17, 5, false, 0.001}, + {0.0001f, 7, 23, false, 0.001}, + {0.0001f, 3, 97, false, 0.001}}; + +const std::vector> inputsd = {{0.000001, 1024, 32, true, 1}, + {0.000001, 1024, 256, true, 1}, + {0.000001, 1024, 256, true, 1}, + {0.000001, 100000000, 1, true, 0.001}, + {0.000001, 1, 30, true, 0.0001}, + {0.000001, 1, 1, true, 0.0001}, + {0.000001, 17, 5, true, 0.0001}, + {0.000001, 7, 23, true, 0.0001}, + {0.000001, 3, 97, true, 0.0001}, + {0.000001, 1024, 32, false, 1}, + {0.000001, 1024, 256, false, 1}, + {0.000001, 1024, 256, false, 1}, + {0.000001, 100000000, 1, false, 0.001}, + {0.000001, 1, 30, false, 0.0001}, + {0.000001, 1, 1, false, 0.0001}, + {0.000001, 17, 5, false, 0.0001}, + {0.000001, 7, 23, false, 0.0001}, + {0.000001, 3, 97, false, 0.0001}}; typedef SumTest SumTestF; -TEST_P(SumTestF, Result) -{ - ASSERT_TRUE(raft::devArrMatch( - float(params.rows), sum_act.data(), params.cols, raft::CompareApprox(params.tolerance))); -} - typedef SumTest SumTestD; -TEST_P(SumTestD, Result) -{ - ASSERT_TRUE(raft::devArrMatch(double(params.rows), - sum_act.data(), - params.cols, - raft::CompareApprox(params.tolerance))); -} + +TEST_P(SumTestF, Result) { runTest(); } +TEST_P(SumTestD, Result) { runTest(); } INSTANTIATE_TEST_CASE_P(SumTests, SumTestF, ::testing::ValuesIn(inputsf));