Skip to content

Commit

Permalink
Final fixes to unit tests of port to Alpaka
Browse files Browse the repository at this point in the history
Reinstates error messages instead of silent call to `REQUIRE(!devices.empty());`
Adds maybe_unused attirbut for variable used in assert.
  • Loading branch information
ericcano committed Jan 23, 2024
1 parent 9af4710 commit e03dde3
Showing 12 changed files with 308 additions and 219 deletions.
2 changes: 1 addition & 1 deletion HeterogeneousCore/AlpakaInterface/interface/VecArray.h
Original file line number Diff line number Diff line change
@@ -42,7 +42,7 @@ namespace cms::alpakatools {
}
}

inline constexpr T cont& back() const {
inline constexpr T const &back() const {
if (m_size > 0) {
return m_data[m_size - 1];
} else
206 changes: 113 additions & 93 deletions HeterogeneousCore/AlpakaInterface/interface/radixSort.h

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions HeterogeneousCore/AlpakaInterface/interface/workdivision.h
Original file line number Diff line number Diff line change
@@ -701,6 +701,12 @@ namespace cms::alpakatools {
stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u] * elements_},
extent_{extent} {}

ALPAKA_FN_ACC inline independent_group_elements(TAcc const& acc, Idx first, Idx extent)
: elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u] * elements_ + first},
stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u] * elements_},
extent_{extent} {}

class const_iterator;
using iterator = const_iterator;

1 change: 0 additions & 1 deletion HeterogeneousCore/AlpakaInterface/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -58,7 +58,6 @@

<bin name="alpakaTestSimpleVector" file="alpaka/testSimpleVector.dev.cc">
<use name="alpaka"/>
<use name="catch2"/>
<use name="HeterogeneousCore/AlpakaInterface"/>
<flags ALPAKA_BACKENDS="1"/>
</bin>
Original file line number Diff line number Diff line change
@@ -42,7 +42,12 @@ struct finalize {
TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair), s_tag) {
SECTION("AtomicPairCounter") {
auto const &devices = cms::alpakatools::devices<Platform>();
REQUIRE(!devices.empty());
if (devices.empty()) {
std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)
<< ", the test will be skipped.\n";
REQUIRE(not devices.empty());
}

// run the test on each device
for (auto const &device : devices) {
std::cout << "Test AtomicPairCounter on " << alpaka::getName(device) << '\n';
Original file line number Diff line number Diff line change
@@ -93,7 +93,7 @@ struct mykernel {
#ifndef NDEBUG
auto b0 = Hist::bin(v[j]);
#endif
int tot = 0;
[[maybe_unused]] int tot = 0;
auto ftest = [&](unsigned int k) {
ALPAKA_ASSERT_OFFLOAD(k < N);
++tot;
110 changes: 58 additions & 52 deletions HeterogeneousCore/AlpakaInterface/test/alpaka/testOneRadixSort.dev.cc
Original file line number Diff line number Diff line change
@@ -64,7 +64,8 @@ ALPAKA_FN_HOST_ACC T truncate(T const& t) {
namespace {
struct testKernel {
template <typename TAcc>
ALPAKA_FN_ACC void operator() (const TAcc &acc, FLOAT* gpu_input, int* gpu_product, int elements, bool doPrint) const {
ALPAKA_FN_ACC void operator()(
const TAcc& acc, FLOAT* gpu_input, int* gpu_product, int elements, bool doPrint) const {
//size_t firstElement = threadIdx.x + blockIdx.x * blockDim.x; // This is going to be the track index
//size_t gridSize = blockDim.x * gridDim.x;
bool threadZero = !alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
@@ -76,15 +77,15 @@ namespace {
assert(0 == blocksIdx);
assert(elements <= 2048);

auto &order = alpaka::declareSharedVar<uint16_t[2048], __COUNTER__>(acc);
auto &sws = alpaka::declareSharedVar<uint16_t[2048], __COUNTER__>(acc);
auto &z = alpaka::declareSharedVar<float[2048], __COUNTER__>(acc);
auto &iz = alpaka::declareSharedVar<int[2048], __COUNTER__>(acc);
// __shared__ uint16_t order[2048];
// __shared__ uint16_t sws[2048];
// __shared__ float z[2048];
// __shared__ int iz[2048];
for (auto itrack: elements_with_stride(acc, elements)) {
auto& order = alpaka::declareSharedVar<uint16_t[2048], __COUNTER__>(acc);
auto& sws = alpaka::declareSharedVar<uint16_t[2048], __COUNTER__>(acc);
auto& z = alpaka::declareSharedVar<float[2048], __COUNTER__>(acc);
auto& iz = alpaka::declareSharedVar<int[2048], __COUNTER__>(acc);
// __shared__ uint16_t order[2048];
// __shared__ uint16_t sws[2048];
// __shared__ float z[2048];
// __shared__ int iz[2048];
for (auto itrack : elements_with_stride(acc, elements)) {
z[itrack] = gpu_input[itrack];
iz[itrack] = 10000 * gpu_input[itrack];
// order[itrack] = itrack;
@@ -94,7 +95,7 @@ namespace {
alpaka::syncBlockThreads(acc);

//verify
for (auto itrack: elements_with_stride(acc, elements - 1)) {
for (auto itrack : elements_with_stride(acc, elements - 1)) {
auto ntrack = order[itrack];
auto mtrack = order[itrack + 1];
assert(truncate<2>(z[ntrack]) <= truncate<2>(z[mtrack]));
@@ -122,7 +123,7 @@ namespace {
radixSort<TAcc, int, 4>(acc, iz, order, sws, elements);
alpaka::syncBlockThreads(acc);

for (auto itrack: elements_with_stride(acc, elements - 1)) {
for (auto itrack : elements_with_stride(acc, elements - 1)) {
auto ntrack = order[itrack];
auto mtrack = order[itrack + 1];
assert(iz[ntrack] <= iz[mtrack]);
@@ -146,11 +147,12 @@ namespace {
}
};

void testWrapper(Queue & queue, FLOAT* gpu_input, int* gpu_product, int elements, bool doPrint) {
void testWrapper(Queue& queue, FLOAT* gpu_input, int* gpu_product, int elements, bool doPrint) {
auto blockSize = 512; // somewhat arbitrary
auto gridSize = 1; // round up to cover the sample size
const auto workdiv = make_workdiv<Acc1D>(gridSize, blockSize);
alpaka::enqueue(queue, alpaka::createTaskKernel<Acc1D>(workdiv, testKernel(), gpu_input, gpu_product, elements, doPrint));
alpaka::enqueue(queue,
alpaka::createTaskKernel<Acc1D>(workdiv, testKernel(), gpu_input, gpu_product, elements, doPrint));
alpaka::wait(queue);
}
} // namespace
@@ -165,50 +167,54 @@ int main() {
<< ", the test will be skipped.\n";
return 0;
}

// run the test on each device
for (auto const& device : devices) {
Queue queue(device);
// FLOAT* gpu_input;
// int* gpu_product;
// FLOAT* gpu_input;
// int* gpu_product;

int nmax = 4 * 260;
auto gpu_input_h = cms::alpakatools::make_host_buffer<FLOAT[]>(queue, nmax);
auto i = gpu_input_h.data();
for (auto v: {
30.0, 30.0, -4.4, -7.1860761642, -6.6870317459, 1.8010582924, 2.2535820007, 2.2666890621,
2.2677690983, 2.2794606686, 2.2802586555, 2.2821085453, 2.2852313519, 2.2877883911, 2.2946476936, 2.2960267067,
2.3006286621, 2.3245604038, 2.6755006313, 2.7229132652, 2.783257246, 2.8440306187, 2.9017834663, 2.9252648354,
2.9254128933, 2.927520752, 2.9422419071, 2.9453969002, 2.9457902908, 2.9465973377, 2.9492356777, 2.9573802948,
2.9575133324, 2.9575304985, 2.9586606026, 2.9605507851, 2.9622797966, 2.9625515938, 2.9641008377, 2.9646151066,
2.9676523209, 2.9708273411, 2.974111557, 2.9742531776, 2.9772830009, 2.9877333641, 2.9960610867, 3.013969183,
3.0187871456, 3.0379793644, 3.0407221317, 3.0415751934, 3.0470511913, 3.0560519695, 3.0592908859, 3.0599737167,
3.0607066154, 3.0629007816, 3.0632448196, 3.0633215904, 3.0643932819, 3.0645000935, 3.0666446686, 3.068046093,
3.0697011948, 3.0717656612, 3.0718104839, 3.0718348026, 3.0733406544, 3.0738227367, 3.0738801956, 3.0738828182,
3.0744686127, 3.0753741264, 3.0758397579, 3.0767207146, 3.0773906708, 3.0778541565, 3.0780284405, 3.0780889988,
3.0782799721, 3.0789675713, 3.0792205334, 3.0793278217, 3.0795567036, 3.0797944069, 3.0806643963, 3.0809247494,
3.0815284252, 3.0817306042, 3.0819730759, 3.0820026398, 3.0838682652, 3.084009409, 3.0848178864, 3.0853257179,
3.0855510235, 3.0856611729, 3.0873703957, 3.0884618759, 3.0891149044, 3.0893011093, 3.0895674229, 3.0901503563,
3.0903317928, 3.0912668705, 3.0920717716, 3.0954346657, 3.096424818, 3.0995628834, 3.1001036167, 3.1173279285,
3.1185023785, 3.1195163727, 3.1568386555, 3.1675374508, 3.1676850319, 3.1886672974, 3.3769197464, 3.3821125031,
3.4780933857, 3.4822063446, 3.4989323616, 3.5076274872, 3.5225863457, 3.5271244049, 3.5298995972, 3.5417425632,
3.5444457531, 3.5465917587, 3.5473103523, 3.5480232239, 3.5526945591, 3.5531234741, 3.5538012981, 3.5544877052,
3.5547749996, 3.5549693108, 3.5550665855, 3.5558729172, 3.5560717583, 3.5560848713, 3.5584278107, 3.558681488,
3.5587313175, 3.5592217445, 3.559384346, 3.5604712963, 3.5634038448, 3.563803196, 3.564593792, 3.5660364628,
3.5683133602, 3.5696356297, 3.569729805, 3.5740811825, 3.5757565498, 3.5760207176, 3.5760478973, 3.5836098194,
3.5839796066, 3.5852358341, 3.5901627541, 3.6141786575, 3.6601481438, 3.7187042236, 3.9741659164, 4.4111995697,
4.5337572098, 4.6292567253, 4.6748633385, 4.6806583405, 4.6868157387, 4.6868577003, 4.6879930496, 4.6888813972,
4.6910686493, 4.6925001144, 4.6957530975, 4.698094368, 4.6997032166, 4.7017259598, 4.7020640373, 4.7024269104,
4.7036352158, 4.7038679123, 4.7042069435, 4.7044086456, 4.7044372559, 4.7050771713, 4.7055773735, 4.7060651779,
4.7062759399, 4.7065420151, 4.70657444, 4.7066287994, 4.7066788673, 4.7067341805, 4.7072944641, 4.7074551582,
4.7075614929, 4.7075891495, 4.7076044083, 4.7077374458, 4.7080879211, 4.70819664, 4.7086658478, 4.708937645,
4.7092385292, 4.709479332, 4.7095656395, 4.7100076675, 4.7102108002, 4.7104525566, 4.7105507851, 4.71118927,
4.7113513947, 4.7115578651, 4.7116270065, 4.7116751671, 4.7117190361, 4.7117333412, 4.7117910385, 4.7119007111,
4.7120013237, 4.712003231, 4.712044239, 4.7122926712, 4.7135767937, 4.7143669128, 4.7145690918, 4.7148418427,
4.7149815559, 4.7159647942, 4.7161884308, 4.7177276611, 4.717815876, 4.718059063, 4.7188801765, 4.7190728188,
4.7199850082, 4.7213058472, 4.7239775658, 4.7243933678, 4.7243990898, 4.7273659706, 4.7294125557, 4.7296204567,
4.7325615883, 4.7356877327, 4.740146637, 4.742254734, 4.7433848381, 4.7454957962, 4.7462964058, 4.7692604065,
4.7723139628, 4.774812736, 4.8577151299, 4.890037536}) {
for (auto v : {30.0, 30.0, -4.4, -7.1860761642, -6.6870317459, 1.8010582924, 2.2535820007,
2.2666890621, 2.2677690983, 2.2794606686, 2.2802586555, 2.2821085453, 2.2852313519, 2.2877883911,
2.2946476936, 2.2960267067, 2.3006286621, 2.3245604038, 2.6755006313, 2.7229132652, 2.783257246,
2.8440306187, 2.9017834663, 2.9252648354, 2.9254128933, 2.927520752, 2.9422419071, 2.9453969002,
2.9457902908, 2.9465973377, 2.9492356777, 2.9573802948, 2.9575133324, 2.9575304985, 2.9586606026,
2.9605507851, 2.9622797966, 2.9625515938, 2.9641008377, 2.9646151066, 2.9676523209, 2.9708273411,
2.974111557, 2.9742531776, 2.9772830009, 2.9877333641, 2.9960610867, 3.013969183, 3.0187871456,
3.0379793644, 3.0407221317, 3.0415751934, 3.0470511913, 3.0560519695, 3.0592908859, 3.0599737167,
3.0607066154, 3.0629007816, 3.0632448196, 3.0633215904, 3.0643932819, 3.0645000935, 3.0666446686,
3.068046093, 3.0697011948, 3.0717656612, 3.0718104839, 3.0718348026, 3.0733406544, 3.0738227367,
3.0738801956, 3.0738828182, 3.0744686127, 3.0753741264, 3.0758397579, 3.0767207146, 3.0773906708,
3.0778541565, 3.0780284405, 3.0780889988, 3.0782799721, 3.0789675713, 3.0792205334, 3.0793278217,
3.0795567036, 3.0797944069, 3.0806643963, 3.0809247494, 3.0815284252, 3.0817306042, 3.0819730759,
3.0820026398, 3.0838682652, 3.084009409, 3.0848178864, 3.0853257179, 3.0855510235, 3.0856611729,
3.0873703957, 3.0884618759, 3.0891149044, 3.0893011093, 3.0895674229, 3.0901503563, 3.0903317928,
3.0912668705, 3.0920717716, 3.0954346657, 3.096424818, 3.0995628834, 3.1001036167, 3.1173279285,
3.1185023785, 3.1195163727, 3.1568386555, 3.1675374508, 3.1676850319, 3.1886672974, 3.3769197464,
3.3821125031, 3.4780933857, 3.4822063446, 3.4989323616, 3.5076274872, 3.5225863457, 3.5271244049,
3.5298995972, 3.5417425632, 3.5444457531, 3.5465917587, 3.5473103523, 3.5480232239, 3.5526945591,
3.5531234741, 3.5538012981, 3.5544877052, 3.5547749996, 3.5549693108, 3.5550665855, 3.5558729172,
3.5560717583, 3.5560848713, 3.5584278107, 3.558681488, 3.5587313175, 3.5592217445, 3.559384346,
3.5604712963, 3.5634038448, 3.563803196, 3.564593792, 3.5660364628, 3.5683133602, 3.5696356297,
3.569729805, 3.5740811825, 3.5757565498, 3.5760207176, 3.5760478973, 3.5836098194, 3.5839796066,
3.5852358341, 3.5901627541, 3.6141786575, 3.6601481438, 3.7187042236, 3.9741659164, 4.4111995697,
4.5337572098, 4.6292567253, 4.6748633385, 4.6806583405, 4.6868157387, 4.6868577003, 4.6879930496,
4.6888813972, 4.6910686493, 4.6925001144, 4.6957530975, 4.698094368, 4.6997032166, 4.7017259598,
4.7020640373, 4.7024269104, 4.7036352158, 4.7038679123, 4.7042069435, 4.7044086456, 4.7044372559,
4.7050771713, 4.7055773735, 4.7060651779, 4.7062759399, 4.7065420151, 4.70657444, 4.7066287994,
4.7066788673, 4.7067341805, 4.7072944641, 4.7074551582, 4.7075614929, 4.7075891495, 4.7076044083,
4.7077374458, 4.7080879211, 4.70819664, 4.7086658478, 4.708937645, 4.7092385292, 4.709479332,
4.7095656395, 4.7100076675, 4.7102108002, 4.7104525566, 4.7105507851, 4.71118927, 4.7113513947,
4.7115578651, 4.7116270065, 4.7116751671, 4.7117190361, 4.7117333412, 4.7117910385, 4.7119007111,
4.7120013237, 4.712003231, 4.712044239, 4.7122926712, 4.7135767937, 4.7143669128, 4.7145690918,
4.7148418427, 4.7149815559, 4.7159647942, 4.7161884308, 4.7177276611, 4.717815876, 4.718059063,
4.7188801765, 4.7190728188, 4.7199850082, 4.7213058472, 4.7239775658, 4.7243933678, 4.7243990898,
4.7273659706, 4.7294125557, 4.7296204567, 4.7325615883, 4.7356877327, 4.740146637, 4.742254734,
4.7433848381, 4.7454957962, 4.7462964058, 4.7692604065, 4.7723139628, 4.774812736, 4.8577151299,
4.890037536}) {
*(i++) = v;
}
auto input = gpu_input_h.data();
@@ -219,7 +225,7 @@ int main() {
}
auto gpu_input_d = cms::alpakatools::make_device_buffer<FLOAT[]>(queue, nmax);
//cudaCheck(cudaMalloc(&gpu_input, sizeof(FLOAT) * nmax));
// cudaCheck(cudaMalloc(&gpu_product, sizeof(int) * nmax));
// cudaCheck(cudaMalloc(&gpu_product, sizeof(int) * nmax));
auto gpu_product_d = cms::alpakatools::make_device_buffer<int[]>(queue, nmax);
// copy the input data to the GPU
alpaka::memcpy(queue, gpu_input_d, gpu_input_h);
Loading

0 comments on commit e03dde3

Please sign in to comment.