From 4b2001ed11bf9c51c5c1031bd486b16fa59a160e Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 7 Dec 2024 20:25:28 +0800 Subject: [PATCH] Use static dim check for the array interface handler. (#11069) --- .../xgboost4j/src/native/xgboost4j-gpu.cu | 4 +- .../xgboost4j/src/native/xgboost4j.cpp | 10 ++-- src/c_api/c_api.cc | 8 ++-- src/c_api/c_api.cu | 4 +- src/data/adapter.h | 48 +++++++++---------- src/data/array_interface.h | 12 ++++- src/data/data.cu | 16 +++---- src/data/device_adapter.cuh | 34 +++++++------ tests/cpp/data/test_array_interface.cc | 8 ++-- 9 files changed, 75 insertions(+), 69 deletions(-) diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu b/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu index 524e5984803d..a9798465686f 100644 --- a/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu +++ b/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu @@ -97,7 +97,7 @@ void CopyInterface(std::vector> &interface_arr, Json{Boolean{false}}}; out["data"] = Array(std::move(j_data)); - out["shape"] = Array(std::vector{Json(Integer(interface.Shape(0)))}); + out["shape"] = Array(std::vector{Json(Integer(interface.Shape<0>()))}); if (interface.valid.Data()) { CopyColumnMask(interface, columns, kind, c, &mask, &out, stream); @@ -113,7 +113,7 @@ void CopyMetaInfo(Json *p_interface, dh::device_vector *out, cudaStream_t str CHECK_EQ(get(j_interface).size(), 1); auto object = get(get(j_interface)[0]); ArrayInterface<1> interface(object); - out->resize(interface.Shape(0)); + out->resize(interface.Shape<0>()); size_t element_size = interface.ElementSize(); size_t size = element_size * interface.n; dh::safe_cuda(cudaMemcpyAsync(RawPtr(*out), interface.data, size, diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp index 3e5087a78f7d..01706beb6b45 100644 --- a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp +++ b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp @@ -1520,20 +1520,20 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetQuanti ArrayInterface<1> indptr{StringView{str_indptr}}; ArrayInterface<1> data{StringView{str_data}}; - CHECK_GE(indptr.Shape(0), 2); + CHECK_GE(indptr.Shape<0>(), 2); // Cut ptr - auto j_indptr_array = jenv->NewLongArray(indptr.Shape(0)); + auto j_indptr_array = jenv->NewLongArray(indptr.Shape<0>()); CHECK_EQ(indptr.type, ArrayInterfaceHandler::Type::kU8); - CHECK_LT(indptr(indptr.Shape(0) - 1), + CHECK_LT(indptr(indptr.Shape<0>() - 1), static_cast(std::numeric_limits::max())); static_assert(sizeof(jlong) == sizeof(std::uint64_t)); - jenv->SetLongArrayRegion(j_indptr_array, 0, indptr.Shape(0), + jenv->SetLongArrayRegion(j_indptr_array, 0, indptr.Shape<0>(), static_cast(indptr.data)); jenv->SetObjectArrayElement(j_indptr, 0, j_indptr_array); // Cut values - auto n_cuts = indptr(indptr.Shape(0) - 1); + auto n_cuts = indptr(indptr.Shape<0>() - 1); jfloatArray jcuts_array = jenv->NewFloatArray(n_cuts); CHECK_EQ(data.type, ArrayInterfaceHandler::Type::kF4); jenv->SetFloatArrayRegion(jcuts_array, 0, n_cuts, static_cast(data.data)); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index ee99922cdd1c..d3e11d2f894c 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1098,18 +1098,18 @@ XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, in ArrayInterface<2, false> i_grad{StringView{grad}}; ArrayInterface<2, false> i_hess{StringView{hess}}; StringView msg{"Mismatched shape between the gradient and hessian."}; - CHECK_EQ(i_grad.Shape(0), i_hess.Shape(0)) << msg; - CHECK_EQ(i_grad.Shape(1), i_hess.Shape(1)) << msg; + CHECK_EQ(i_grad.Shape<0>(), i_hess.Shape<0>()) << msg; + CHECK_EQ(i_grad.Shape<1>(), i_hess.Shape<1>()) << msg; linalg::Matrix gpair; auto grad_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_grad.data); auto hess_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_hess.data); - CHECK_EQ(i_grad.Shape(0), p_fmat->Info().num_row_) + CHECK_EQ(i_grad.Shape<0>(), p_fmat->Info().num_row_) << "Mismatched size between the gradient and training data."; CHECK_EQ(grad_is_cuda, hess_is_cuda) << "gradient and hessian should be on the same device."; auto *learner = static_cast(handle); auto ctx = learner->Ctx(); if (!grad_is_cuda) { - gpair.Reshape(i_grad.Shape(0), i_grad.Shape(1)); + gpair.Reshape(i_grad.Shape<0>(), i_grad.Shape<1>()); auto h_gpair = gpair.HostView(); DispatchDType(i_grad, DeviceOrd::CPU(), [&](auto &&t_grad) { DispatchDType(i_hess, DeviceOrd::CPU(), [&](auto &&t_hess) { diff --git a/src/c_api/c_api.cu b/src/c_api/c_api.cu index 47868f466473..c9ff16dea120 100644 --- a/src/c_api/c_api.cu +++ b/src/c_api/c_api.cu @@ -1,5 +1,5 @@ /** - * Copyright 2019-2023, XGBoost Contributors + * Copyright 2019-2024, XGBoost Contributors */ #include // for transform @@ -78,7 +78,7 @@ void CopyGradientFromCUDAArrays(Context const *ctx, ArrayInterface<2, false> con CHECK_EQ(grad_dev, hess_dev) << "gradient and hessian should be on the same device."; auto &gpair = *out_gpair; gpair.SetDevice(DeviceOrd::CUDA(grad_dev)); - gpair.Reshape(grad.Shape(0), grad.Shape(1)); + gpair.Reshape(grad.Shape<0>(), grad.Shape<1>()); auto d_gpair = gpair.View(DeviceOrd::CUDA(grad_dev)); auto cuctx = ctx->CUDACtx(); diff --git a/src/data/adapter.h b/src/data/adapter.h index 0ad1e9e3864c..1467d3376886 100644 --- a/src/data/adapter.h +++ b/src/data/adapter.h @@ -1,22 +1,22 @@ /** - * Copyright 2019-2023, XGBoost Contributors + * Copyright 2019-2024, XGBoost Contributors * \file adapter.h */ #ifndef XGBOOST_DATA_ADAPTER_H_ #define XGBOOST_DATA_ADAPTER_H_ #include -#include -#include // for size_t -#include -#include -#include -#include -#include -#include // std::move -#include - -#include "../common/error_msg.h" // for MaxFeatureSize +#include // for transform, all_of +#include // for isfinite +#include // for size_t +#include // for uint8_t +#include // for back_inserter +#include // for numeric_limits +#include // for unique_ptr, make_unique +#include // for string +#include // for move +#include // for vector + #include "../common/math.h" #include "array_interface.h" #include "xgboost/base.h" @@ -256,7 +256,7 @@ class ArrayAdapterBatch : public detail::NoMetaInfo { Line(ArrayInterface<2> array_interface, size_t ridx) : array_interface_{std::move(array_interface)}, ridx_{ridx} {} - size_t Size() const { return array_interface_.Shape(1); } + size_t Size() const { return array_interface_.Shape<1>(); } COOTuple GetElement(size_t idx) const { return {ridx_, idx, array_interface_(ridx_, idx)}; @@ -269,8 +269,8 @@ class ArrayAdapterBatch : public detail::NoMetaInfo { return Line{array_interface_, idx}; } - [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); } - [[nodiscard]] std::size_t NumCols() const { return array_interface_.Shape(1); } + [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape<0>(); } + [[nodiscard]] std::size_t NumCols() const { return array_interface_.Shape<1>(); } [[nodiscard]] std::size_t Size() const { return this->NumRows(); } explicit ArrayAdapterBatch(ArrayInterface<2> array_interface) @@ -290,8 +290,8 @@ class ArrayAdapter : public detail::SingleBatchDataIter { batch_ = ArrayAdapterBatch{array_interface_}; } [[nodiscard]] ArrayAdapterBatch const& Value() const override { return batch_; } - [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); } - [[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape(1); } + [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape<0>(); } + [[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape<1>(); } private: ArrayAdapterBatch batch_; @@ -321,7 +321,7 @@ class CSRArrayAdapterBatch : public detail::NoMetaInfo { } [[nodiscard]] std::size_t Size() const { - return values_.Shape(0); + return values_.Shape<0>(); } }; @@ -339,7 +339,7 @@ class CSRArrayAdapterBatch : public detail::NoMetaInfo { } size_t NumRows() const { - size_t size = indptr_.Shape(0); + size_t size = indptr_.Shape<0>(); size = size == 0 ? 0 : size - 1; return size; } @@ -381,9 +381,9 @@ class CSRArrayAdapter : public detail::SingleBatchDataIter return batch_; } size_t NumRows() const { - size_t size = indptr_.Shape(0); + size_t size = indptr_.Shape<0>(); size = size == 0 ? 0 : size - 1; - return size; + return size; } size_t NumColumns() const { return num_cols_; } @@ -479,7 +479,7 @@ class CSCArrayAdapterBatch : public detail::NoMetaInfo { values_{std::move(values)}, offset_{offset} {} - std::size_t Size() const { return values_.Shape(0); } + std::size_t Size() const { return values_.Shape<0>(); } COOTuple GetElement(std::size_t idx) const { return {TypedIndex{row_idx_}(offset_ + idx), column_idx_, values_(offset_ + idx)}; @@ -684,7 +684,7 @@ class ColumnarAdapterBatch : public detail::NoMetaInfo { : columns_{columns} {} [[nodiscard]] Line GetLine(std::size_t ridx) const { return Line{columns_, ridx}; } [[nodiscard]] std::size_t Size() const { - return columns_.empty() ? 0 : columns_.front().Shape(0); + return columns_.empty() ? 0 : columns_.front().Shape<0>(); } [[nodiscard]] std::size_t NumCols() const { return columns_.empty() ? 0 : columns_.size(); } [[nodiscard]] std::size_t NumRows() const { return this->Size(); } @@ -707,7 +707,7 @@ class ColumnarAdapter : public detail::SingleBatchDataIter bool consistent = columns_.empty() || std::all_of(columns_.cbegin(), columns_.cend(), [&](ArrayInterface<1, false> const& array) { - return array.Shape(0) == columns_[0].Shape(0); + return array.Shape<0>() == columns_[0].Shape<0>(); }); CHECK(consistent) << "Size of columns should be the same."; batch_ = ColumnarAdapterBatch{columns_}; diff --git a/src/data/array_interface.h b/src/data/array_interface.h index 93fb55dd5626..35056b74f3aa 100644 --- a/src/data/array_interface.h +++ b/src/data/array_interface.h @@ -501,8 +501,16 @@ class ArrayInterface { } } - [[nodiscard]] XGBOOST_DEVICE std::size_t Shape(size_t i) const { return shape[i]; } - [[nodiscard]] XGBOOST_DEVICE std::size_t Stride(size_t i) const { return strides[i]; } + template + [[nodiscard]] XGBOOST_DEVICE std::size_t Shape() const { + static_assert(i < D); + return shape[i]; + } + template + [[nodiscard]] XGBOOST_DEVICE std::size_t Stride() const { + static_assert(i < D); + return strides[i]; + } template XGBOOST_HOST_DEV_INLINE decltype(auto) DispatchCall(Fn func) const { diff --git a/src/data/data.cu b/src/data/data.cu index 73717aa79700..17fc54a562a4 100644 --- a/src/data/data.cu +++ b/src/data/data.cu @@ -69,12 +69,12 @@ void CopyGroupInfoImpl(ArrayInterface<1> column, std::vector* out) auto ptr_device = SetDeviceToPtr(column.data); CHECK_EQ(ptr_device, dh::CurrentDevice()); - dh::TemporaryArray temp(column.Shape(0)); + dh::TemporaryArray temp(column.Shape<0>()); auto d_tmp = temp.data().get(); - dh::LaunchN(column.Shape(0), + dh::LaunchN(column.Shape<0>(), [=] __device__(size_t idx) { d_tmp[idx] = TypedIndex{column}(idx); }); - auto length = column.Shape(0); + auto length = column.Shape<0>(); out->resize(length + 1); out->at(0) = 0; thrust::copy(temp.data(), temp.data() + length, out->begin() + 1); @@ -93,7 +93,7 @@ void CopyQidImpl(Context const* ctx, ArrayInterface<1> array_interface, auto d = DeviceOrd::CUDA(SetDeviceToPtr(array_interface.data)); auto cuctx = ctx->CUDACtx(); dh::LaunchN(1, cuctx->Stream(), [=] __device__(size_t) { d_flag[0] = true; }); - dh::LaunchN(array_interface.Shape(0) - 1, cuctx->Stream(), [=] __device__(size_t i) { + dh::LaunchN(array_interface.Shape<0>() - 1, cuctx->Stream(), [=] __device__(size_t i) { auto typed = TypedIndex{array_interface}; if (typed(i) > typed(i + 1)) { d_flag[0] = false; @@ -104,15 +104,15 @@ void CopyQidImpl(Context const* ctx, ArrayInterface<1> array_interface, cudaMemcpyDeviceToHost)); CHECK(non_dec) << "`qid` must be sorted in increasing order along with data."; size_t bytes = 0; - dh::caching_device_vector out(array_interface.Shape(0)); - dh::caching_device_vector cnt(array_interface.Shape(0)); + dh::caching_device_vector out(array_interface.Shape<0>()); + dh::caching_device_vector cnt(array_interface.Shape<0>()); HostDeviceVector d_num_runs_out(1, 0, d); cub::DeviceRunLengthEncode::Encode(nullptr, bytes, it, out.begin(), cnt.begin(), - d_num_runs_out.DevicePointer(), array_interface.Shape(0), + d_num_runs_out.DevicePointer(), array_interface.Shape<0>(), cuctx->Stream()); dh::CachingDeviceUVector tmp(bytes); cub::DeviceRunLengthEncode::Encode(tmp.data(), bytes, it, out.begin(), cnt.begin(), - d_num_runs_out.DevicePointer(), array_interface.Shape(0), + d_num_runs_out.DevicePointer(), array_interface.Shape<0>(), cuctx->Stream()); auto h_num_runs_out = d_num_runs_out.HostSpan()[0]; diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh index 9089c361ea23..cad3cffbc58a 100644 --- a/src/data/device_adapter.cuh +++ b/src/data/device_adapter.cuh @@ -16,9 +16,7 @@ #include "adapter.h" #include "array_interface.h" -namespace xgboost { -namespace data { - +namespace xgboost::data { class CudfAdapterBatch : public detail::NoMetaInfo { friend class CudfAdapter; @@ -114,7 +112,7 @@ class CudfAdapter : public detail::SingleBatchDataIter { CHECK_EQ(typestr.size(), 3) << ArrayInterfaceErrors::TypestrFormat(); std::vector> columns; auto first_column = ArrayInterface<1>(get(json_columns[0])); - num_rows_ = first_column.Shape(0); + num_rows_ = first_column.Shape<0>(); if (num_rows_ == 0) { return; } @@ -124,12 +122,12 @@ class CudfAdapter : public detail::SingleBatchDataIter { dh::safe_cuda(cudaSetDevice(device_.ordinal)); for (auto& json_col : json_columns) { auto column = ArrayInterface<1>(get(json_col)); - n_bytes_ += column.ElementSize() * column.Shape(0); + n_bytes_ += column.ElementSize() * column.Shape<0>(); columns.push_back(column); - num_rows_ = std::max(num_rows_, column.Shape(0)); + num_rows_ = std::max(num_rows_, column.Shape<0>()); CHECK_EQ(device_.ordinal, dh::CudaGetPointerDevice(column.data)) << "All columns should use the same device."; - CHECK_EQ(num_rows_, column.Shape(0)) + CHECK_EQ(num_rows_, column.Shape<0>()) << "All columns should have same number of rows."; } columns_ = columns; @@ -161,12 +159,13 @@ class CupyAdapterBatch : public detail::NoMetaInfo { CupyAdapterBatch() = default; explicit CupyAdapterBatch(ArrayInterface<2> array_interface) : array_interface_(std::move(array_interface)) {} + // The total number of elements. [[nodiscard]] std::size_t Size() const { - return array_interface_.Shape(0) * array_interface_.Shape(1); + return array_interface_.Shape<0>() * array_interface_.Shape<1>(); } [[nodiscard]]__device__ COOTuple GetElement(size_t idx) const { - size_t column_idx = idx % array_interface_.Shape(1); - size_t row_idx = idx / array_interface_.Shape(1); + size_t column_idx = idx % array_interface_.Shape<1>(); + size_t row_idx = idx / array_interface_.Shape<1>(); float value = array_interface_(row_idx, column_idx); return {row_idx, column_idx, value}; } @@ -175,8 +174,8 @@ class CupyAdapterBatch : public detail::NoMetaInfo { return value; } - [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return array_interface_.Shape(0); } - [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return array_interface_.Shape(1); } + [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return array_interface_.Shape<0>(); } + [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return array_interface_.Shape<1>(); } private: ArrayInterface<2> array_interface_; @@ -188,20 +187,20 @@ class CupyAdapter : public detail::SingleBatchDataIter { Json json_array_interface = Json::Load(cuda_interface_str); array_interface_ = ArrayInterface<2>(get(json_array_interface)); batch_ = CupyAdapterBatch(array_interface_); - if (array_interface_.Shape(0) == 0) { + if (array_interface_.Shape<0>() == 0) { return; } device_ = DeviceOrd::CUDA(dh::CudaGetPointerDevice(array_interface_.data)); this->n_bytes_ = - array_interface_.Shape(0) * array_interface_.Shape(1) * array_interface_.ElementSize(); + array_interface_.Shape<0>() * array_interface_.Shape<1>() * array_interface_.ElementSize(); CHECK(device_.IsCUDA()); } explicit CupyAdapter(std::string cuda_interface_str) : CupyAdapter{StringView{cuda_interface_str}} {} [[nodiscard]] const CupyAdapterBatch& Value() const override { return batch_; } - [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); } - [[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape(1); } + [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape<0>(); } + [[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape<1>(); } [[nodiscard]] DeviceOrd Device() const { return device_; } [[nodiscard]] bst_idx_t SizeBytes() const { return this->n_bytes_; } @@ -279,6 +278,5 @@ bool NoInfInData(Context const* ctx, AdapterBatchT const& batch, IsValidFunctor thrust::logical_and<>{}); return valid; } -}; // namespace data -} // namespace xgboost +} // namespace xgboost::data #endif // XGBOOST_DATA_DEVICE_ADAPTER_H_ diff --git a/tests/cpp/data/test_array_interface.cc b/tests/cpp/data/test_array_interface.cc index b692a2aa5378..f87932e77749 100644 --- a/tests/cpp/data/test_array_interface.cc +++ b/tests/cpp/data/test_array_interface.cc @@ -14,8 +14,8 @@ TEST(ArrayInterface, Initialize) { HostDeviceVector storage; auto array = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage); auto arr_interface = ArrayInterface<2>(StringView{array}); - ASSERT_EQ(arr_interface.Shape(0), kRows); - ASSERT_EQ(arr_interface.Shape(1), kCols); + ASSERT_EQ(arr_interface.Shape<0>(), kRows); + ASSERT_EQ(arr_interface.Shape<1>(), kCols); ASSERT_EQ(arr_interface.data, storage.ConstHostPointer()); ASSERT_EQ(arr_interface.ElementSize(), 4); ASSERT_EQ(arr_interface.type, ArrayInterfaceHandler::kF4); @@ -106,7 +106,7 @@ TEST(ArrayInterface, TrivialDim) { { ArrayInterface<1> arr_i{interface_str}; ASSERT_EQ(arr_i.n, kRows); - ASSERT_EQ(arr_i.Shape(0), kRows); + ASSERT_EQ(arr_i.Shape<0>(), kRows); } std::swap(kRows, kCols); @@ -114,7 +114,7 @@ TEST(ArrayInterface, TrivialDim) { { ArrayInterface<1> arr_i{interface_str}; ASSERT_EQ(arr_i.n, kCols); - ASSERT_EQ(arr_i.Shape(0), kCols); + ASSERT_EQ(arr_i.Shape<0>(), kCols); } }