Skip to content

Commit

Permalink
SPMV tpl fixes, cusparse workaround (#2152)
Browse files Browse the repository at this point in the history
* SPMV tpl fixes, workaround

* Avoid possible integer conversion warnings

* Document cusparseSpMM algos that were tested
  • Loading branch information
brian-kelley authored Mar 26, 2024
1 parent c435777 commit ccc4be5
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 32 deletions.
9 changes: 8 additions & 1 deletion sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_avail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@ struct spmv_mv_tpl_spec_avail {
non-transpose that produces incorrect result. This is cusparse distributed with
CUDA 10.1.243. The bug seems to be resolved by CUSPARSE 10301 (present by
CUDA 10.2.89) */
#if defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION)

/* cusparseSpMM also produces incorrect results for some inputs in CUDA 11.6.1.
* (CUSPARSE_VERSION 11702).
* ALG1 and ALG3 produce completely incorrect results for one set of inputs.
* ALG2 works for that case, but has low numerical accuracy in another case.
*/
#if defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION) && \
(CUSPARSE_VERSION != 11702)
KOKKOSSPARSE_SPMV_MV_TPL_SPEC_AVAIL_CUSPARSE(double, int, int,
Kokkos::LayoutLeft,
Kokkos::LayoutLeft,
Expand Down
25 changes: 19 additions & 6 deletions sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,12 @@

/* CUSPARSE_VERSION < 10301 either doesn't have cusparseSpMM
or the non-tranpose version produces incorrect results.
Version 11702 corresponds to CUDA 11.6.1, which also produces incorrect
results. 11701 (CUDA 11.6.0) is OK.
*/
#if defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION)
#if defined(CUSPARSE_VERSION) && (10301 <= CUSPARSE_VERSION) && \
(CUSPARSE_VERSION != 11702)
#include "cusparse.h"
#include "KokkosSparse_Utils_cusparse.hpp"

Expand Down Expand Up @@ -63,9 +67,14 @@ inline cudaDataType compute_type<Kokkos::Experimental::half_t>() {
*/
template <typename ViewType, std::enable_if_t<ViewType::rank == 2, bool> = true>
cusparseDnMatDescr_t make_cusparse_dn_mat_descr_t(ViewType &view) {
const int64_t rows = view.extent(0);
const int64_t cols = view.extent(1);
const int64_t ld = view.extent(0);
// If the view is LayoutRight, we still need to create descr as column-major
// but it should be an implicit transpose, meaning dimensions and strides are
// swapped
bool transpose =
std::is_same_v<typename ViewType::array_layout, Kokkos::LayoutRight>;
const size_t rows = transpose ? view.extent(1) : view.extent(0);
const size_t cols = transpose ? view.extent(0) : view.extent(1);
const size_t ld = transpose ? view.stride(0) : view.stride(1);

// cusparseCreateCsr notes it is safe to const_cast this away for input
// pointers to a descriptor as long as that descriptor is not an output
Expand All @@ -83,8 +92,9 @@ cusparseDnMatDescr_t make_cusparse_dn_mat_descr_t(ViewType &view) {
const cusparseOrder_t order = CUSPARSE_ORDER_COL;

cusparseDnMatDescr_t descr;
KOKKOS_CUSPARSE_SAFE_CALL(
cusparseCreateDnMat(&descr, rows, cols, ld, values, valueType, order));
KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnMat(
&descr, static_cast<int64_t>(rows), static_cast<int64_t>(cols),
static_cast<int64_t>(ld), values, valueType, order));

return descr;
}
Expand Down Expand Up @@ -143,6 +153,9 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle,
constexpr bool xIsLR =
std::is_same<typename XVector::array_layout, Kokkos::LayoutRight>::value;
static_assert(xIsLL || xIsLR, "X multivector was not LL or LR (TPL error)");
static_assert(
std::is_same_v<typename YVector::array_layout, Kokkos::LayoutLeft>,
"Y multivector was not LL (TPL error)");
cusparseDnMatDescr_t vecX = make_cusparse_dn_mat_descr_t(x);
cusparseDnMatDescr_t vecY = make_cusparse_dn_mat_descr_t(y);
cusparseOperation_t opB =
Expand Down
85 changes: 60 additions & 25 deletions sparse/unit_test/Test_Sparse_spmv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,13 +518,13 @@ void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth,
using handle_t =
KokkosSparse::SPMVHandle<Device, crsMat_t, ViewTypeX, ViewTypeY>;

ViewTypeX b_x("A", numRows, numMV);
ViewTypeY b_y("B", numCols, numMV);
ViewTypeY b_y_copy("B", numCols, numMV);
ViewTypeX b_x("A", numCols, numMV);
ViewTypeY b_y("B", numRows, numMV);
ViewTypeY b_y_copy("B", numRows, numMV);

ViewTypeX b_xt("A", numCols, numMV);
ViewTypeY b_yt("B", numRows, numMV);
ViewTypeY b_yt_copy("B", numRows, numMV);
ViewTypeX b_xt("A", numRows, numMV);
ViewTypeY b_yt("B", numCols, numMV);
ViewTypeY b_yt_copy("B", numCols, numMV);

Kokkos::Random_XorShift64_Pool<typename Device::execution_space> rand_pool(
13718);
Expand Down Expand Up @@ -582,9 +582,9 @@ void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth,
}

template <typename scalar_t, typename lno_t, typename size_type,
typename layout, class Device>
void test_spmv_mv_heavy(lno_t numRows, size_type nnz, lno_t bandwidth,
lno_t row_size_variance, int numMV) {
typename layout_x, typename layout_y, class Device>
void test_spmv_mv_heavy(lno_t numRows, lno_t numCols, size_type nnz,
lno_t bandwidth, lno_t row_size_variance, int numMV) {
#if defined(KOKKOSKERNELS_ENABLE_TPL_ARMPL) || defined(KOKKOS_ARCH_A64FX)
if (std::is_same<scalar_t, Kokkos::complex<double>>::value) {
std::cerr
Expand All @@ -596,8 +596,8 @@ void test_spmv_mv_heavy(lno_t numRows, size_type nnz, lno_t bandwidth,
#endif // KOKKOSKERNELS_ENABLE_TPL_ARMPL || KOKKOS_ARCH_A64FX
using crsMat_t = typename KokkosSparse::CrsMatrix<scalar_t, lno_t, Device,
void, size_type>;
using ViewTypeX = Kokkos::View<scalar_t **, layout, Device>;
using ViewTypeY = Kokkos::View<scalar_t **, layout, Device>;
using ViewTypeX = Kokkos::View<scalar_t **, layout_x, Device>;
using ViewTypeY = Kokkos::View<scalar_t **, layout_y, Device>;
using mag_t = typename Kokkos::ArithTraits<scalar_t>::mag_type;
using handle_t =
KokkosSparse::SPMVHandle<Device, crsMat_t, ViewTypeX, ViewTypeY>;
Expand All @@ -607,23 +607,30 @@ void test_spmv_mv_heavy(lno_t numRows, size_type nnz, lno_t bandwidth,
constexpr mag_t max_val = static_cast<mag_t>(10);

crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix<crsMat_t>(
numRows, numRows, nnz, row_size_variance, bandwidth);
numRows, numCols, nnz, row_size_variance, bandwidth);
Kokkos::Random_XorShift64_Pool<typename Device::execution_space> rand_pool(
13718);

const lno_t max_nnz_per_row =
numRows ? (nnz / numRows + row_size_variance) : 0;

for (int nv = 1; nv <= numMV; nv++) {
ViewTypeX b_x("A", numRows, nv);
ViewTypeX b_x("A", numCols, nv);
ViewTypeY b_y("B", numRows, nv);
ViewTypeY b_y_copy("B", numRows, nv);

ViewTypeX b_xt("A", numRows, nv);
ViewTypeY b_yt("B", numCols, nv);
ViewTypeY b_yt_copy("B", numCols, nv);

Kokkos::fill_random(b_x, rand_pool, scalar_t(10));
Kokkos::fill_random(b_y, rand_pool, scalar_t(10));
Kokkos::fill_random(b_xt, rand_pool, scalar_t(10));
Kokkos::fill_random(b_yt, rand_pool, scalar_t(10));
Kokkos::fill_random(input_mat.values, rand_pool, scalar_t(10));

Kokkos::deep_copy(b_y_copy, b_y);
Kokkos::deep_copy(b_yt_copy, b_yt);

handle_t handle;

Expand All @@ -633,9 +640,9 @@ void test_spmv_mv_heavy(lno_t numRows, size_type nnz, lno_t bandwidth,
"N", max_y);
Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 1.0, 1.0, nv,
"N", max_y + max_nnz_per_row * max_val * max_x);
Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 1.0, 0.0, nv,
Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, 1.0, 0.0, nv,
"T", max_nnz_per_row * max_val * max_x);
Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, 0.0, 1.0, nv,
Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy, 0.0, 1.0, nv,
"T", max_y);
// Testing all modes together, since matrix is square
std::vector<const char *> modes = {"N", "C", "T", "H"};
Expand All @@ -645,8 +652,13 @@ void test_spmv_mv_heavy(lno_t numRows, size_type nnz, lno_t bandwidth,
for (double beta : testAlphaBeta) {
mag_t max_error =
beta * max_y + alpha * max_nnz_per_row * max_val * max_x;
Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, alpha,
beta, nv, mode, max_error);
if (*mode == 'N' || *mode == 'C') {
Test::check_spmv_mv(&handle, input_mat, b_x, b_y, b_y_copy, alpha,
beta, nv, mode, max_error);
} else {
Test::check_spmv_mv(&handle, input_mat, b_xt, b_yt, b_yt_copy,
alpha, beta, nv, mode, max_error);
}
}
}
}
Expand Down Expand Up @@ -1189,19 +1201,30 @@ void test_spmv_all_interfaces_light() {
TestCategory, \
sparse##_##spmv_mv##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \
test_spmv_mv<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, DEVICE>( \
1000, 1000 * 3, 200, 10, true, 1); \
1001, 1001 * 3, 200, 10, true, 1); \
test_spmv_mv<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, DEVICE>( \
1000, 1000 * 3, 100, 10, true, 5); \
999, 999 * 3, 100, 10, true, 5); \
test_spmv_mv<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, DEVICE>( \
1000, 1000 * 2, 100, 5, true, 10); \
1003, 1003 * 2, 100, 5, true, 10); \
test_spmv_mv<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, DEVICE>( \
50000, 50000 * 3, 20, 10, false, 1); \
50007, 50007 * 3, 20, 10, false, 1); \
test_spmv_mv<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, DEVICE>( \
50000, 50000 * 3, 100, 10, false, 1); \
50002, 50002 * 3, 100, 10, false, 1); \
test_spmv_mv<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, DEVICE>( \
10000, 10000 * 2, 100, 5, false, 5); \
test_spmv_mv_heavy<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, DEVICE>( \
200, 200 * 10, 60, 4, 30); \
test_spmv_mv_heavy<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, \
Kokkos::LAYOUT, DEVICE>(204, 201, 204 * 10, 60, 4, 30); \
test_spmv_mv_heavy<SCALAR, ORDINAL, OFFSET, Kokkos::LAYOUT, \
Kokkos::LAYOUT, DEVICE>(2, 3, 5, 3, 1, 10); \
}

#define EXECUTE_TEST_MV_MIXED_LAYOUT(SCALAR, ORDINAL, OFFSET, DEVICE) \
TEST_F( \
TestCategory, \
sparse##_##spmv_mv_mixed_layout##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \
test_spmv_mv_heavy<SCALAR, ORDINAL, OFFSET, Kokkos::LayoutRight, \
Kokkos::LayoutLeft, DEVICE>(99, 101, 100 * 15, 40, 4, \
20); \
}

#define EXECUTE_TEST_STRUCT(SCALAR, ORDINAL, OFFSET, DEVICE) \
Expand Down Expand Up @@ -1268,8 +1291,20 @@ EXECUTE_TEST_ISSUE_101(TestDevice)
#include <Test_Common_Test_All_Type_Combos.hpp>

#undef KOKKOSKERNELS_EXECUTE_TEST
#endif

#endif // defined(KOKKOSKERNELS_INST_LAYOUTRIGHT)
// Test that requires mixing LayoutLeft and LayoutRight (never an ETI'd
// combination)
#if (!defined(KOKKOSKERNELS_ETI_ONLY) && \
!defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))

#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \
EXECUTE_TEST_MV_MIXED_LAYOUT(SCALAR, ORDINAL, OFFSET, TestDevice)

#include <Test_Common_Test_All_Type_Combos.hpp>

#undef KOKKOSKERNELS_EXECUTE_TEST
#endif

#undef EXECUTE_TEST_FN
#undef EXECUTE_TEST_STRUCT
Expand Down

0 comments on commit ccc4be5

Please sign in to comment.