From 6c0e235fdafd267a493eb548722921353e8aba6a Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 13 Mar 2024 19:06:44 +0800 Subject: [PATCH] Fix. --- src/collective/device_communicator_adapter.cuh | 2 ++ src/common/device_helpers.cuh | 3 --- src/common/hist_util.cu | 4 +--- src/common/hist_util.cuh | 2 +- src/common/host_device_vector.cc | 2 +- src/common/host_device_vector.cu | 2 +- src/common/quantile.cu | 10 +++++----- src/data/data.cc | 2 +- src/data/device_adapter.cuh | 6 +++--- src/data/simple_dmatrix.cuh | 2 +- src/predictor/gpu_predictor.cu | 14 +++++++------- src/predictor/predictor.cc | 2 +- tests/cpp/common/test_hist_util.cu | 8 ++++---- tests/cpp/common/test_quantile.cu | 14 +++++++------- tests/cpp/common/test_span.cu | 11 ++++++----- tests/cpp/data/test_device_adapter.cu | 2 +- tests/cpp/plugin/test_sycl_predictor.cc | 4 ++-- tests/cpp/tree/gpu_hist/test_evaluate_splits.cu | 8 ++++---- 18 files changed, 48 insertions(+), 50 deletions(-) diff --git a/src/collective/device_communicator_adapter.cuh b/src/collective/device_communicator_adapter.cuh index 7d3e836a0ec9..647c74b4e856 100644 --- a/src/collective/device_communicator_adapter.cuh +++ b/src/collective/device_communicator_adapter.cuh @@ -3,6 +3,8 @@ */ #pragma once +#include // for accumulate + #include "communicator.h" #include "device_communicator.cuh" diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index 46f76c41589d..026fbacf24be 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -19,11 +19,9 @@ #include #include -#include #include // for size_t #include #include -#include #include #include #include @@ -31,7 +29,6 @@ #include "../collective/communicator-inl.h" #include "common.h" -#include "xgboost/global_config.h" #include "xgboost/host_device_vector.h" #include "xgboost/logging.h" #include "xgboost/span.h" diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu index a2fda7017b8b..39f310ebb66a 100644 --- a/src/common/hist_util.cu +++ b/src/common/hist_util.cu @@ -13,8 +13,6 @@ #include #include // for size_t -#include -#include #include #include @@ -123,7 +121,7 @@ void SortByWeight(dh::device_vector* weights, dh::device_vector* s [=] __device__(const Entry& a, const Entry& b) { return a.index == b.index; }); } -void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span d_cuts_ptr, +void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span d_cuts_ptr, dh::device_vector* p_sorted_entries, dh::device_vector* p_sorted_weights, dh::caching_device_vector* p_column_sizes_scan) { diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh index e899f2a1ce07..fe3771924043 100644 --- a/src/common/hist_util.cuh +++ b/src/common/hist_util.cuh @@ -240,7 +240,7 @@ void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter, Ran void SortByWeight(dh::device_vector* weights, dh::device_vector* sorted_entries); -void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span d_cuts_ptr, +void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span d_cuts_ptr, dh::device_vector* p_sorted_entries, dh::device_vector* p_sorted_weights, dh::caching_device_vector* p_column_sizes_scan); diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc index a7a996c6c1ff..f4973c0428f0 100644 --- a/src/common/host_device_vector.cc +++ b/src/common/host_device_vector.cc @@ -178,7 +178,7 @@ template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; -template class HostDeviceVector; // bst_row_t +template class HostDeviceVector; template class HostDeviceVector; // bst_feature_t #if defined(__APPLE__) || defined(__EMSCRIPTEN__) diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index 4933a4b11344..267309288522 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -412,7 +412,7 @@ template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; -template class HostDeviceVector; // bst_row_t +template class HostDeviceVector; template class HostDeviceVector; // bst_feature_t template class HostDeviceVector; template class HostDeviceVector; diff --git a/src/common/quantile.cu b/src/common/quantile.cu index b3dd770ab6a0..0a7d0b0785e8 100644 --- a/src/common/quantile.cu +++ b/src/common/quantile.cu @@ -114,9 +114,9 @@ void CopyTo(Span out, Span src) { // Compute the merge path. common::Span> MergePath( - Span const &d_x, Span const &x_ptr, - Span const &d_y, Span const &y_ptr, - Span out, Span out_ptr) { + Span const &d_x, Span const &x_ptr, + Span const &d_y, Span const &y_ptr, + Span out, Span out_ptr) { auto x_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple( dh::MakeTransformIterator( thrust::make_counting_iterator(0ul), @@ -206,8 +206,8 @@ common::Span> MergePath( // run it in 2 passes to obtain the merge path and then customize the standard merge // algorithm. void MergeImpl(DeviceOrd device, Span const &d_x, - Span const &x_ptr, Span const &d_y, - Span const &y_ptr, Span out, Span out_ptr) { + Span const &x_ptr, Span const &d_y, + Span const &y_ptr, Span out, Span out_ptr) { dh::safe_cuda(cudaSetDevice(device.ordinal)); CHECK_EQ(d_x.size() + d_y.size(), out.size()); CHECK_EQ(x_ptr.size(), out_ptr.size()); diff --git a/src/data/data.cc b/src/data/data.cc index b6ecd4db74c2..8cdcde201c79 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -47,7 +47,7 @@ #include "simple_dmatrix.h" // for SimpleDMatrix #include "sparse_page_writer.h" // for SparsePageFormatReg #include "validation.h" // for LabelsCheck, WeightsCheck, ValidateQueryGroup -#include "xgboost/base.h" // for bst_group_t, bst_row_t, bst_float, bst_ulong +#include "xgboost/base.h" // for bst_group_t, bst_idx_t, bst_float, bst_ulong #include "xgboost/context.h" // for Context #include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/learner.h" // for HostDeviceVector diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh index 2dbc2b1e08ab..bc012fd9b439 100644 --- a/src/data/device_adapter.cuh +++ b/src/data/device_adapter.cuh @@ -208,8 +208,8 @@ class CupyAdapter : public detail::SingleBatchDataIter { // Returns maximum row length template -std::size_t GetRowCounts(const AdapterBatchT batch, common::Span offset, DeviceOrd device, - float missing) { +bst_idx_t GetRowCounts(const AdapterBatchT batch, common::Span offset, DeviceOrd device, + float missing) { dh::safe_cuda(cudaSetDevice(device.ordinal)); IsValidFunctor is_valid(missing); dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes())); @@ -248,7 +248,7 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span offs bst_idx_t row_stride = dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()), thrust::device_pointer_cast(offset.data()) + offset.size(), - static_cast(0), thrust::maximum()); + static_cast(0), thrust::maximum()); return row_stride; } diff --git a/src/data/simple_dmatrix.cuh b/src/data/simple_dmatrix.cuh index 528bea8be80a..e3c241886007 100644 --- a/src/data/simple_dmatrix.cuh +++ b/src/data/simple_dmatrix.cuh @@ -40,7 +40,7 @@ void CopyDataToDMatrix(AdapterBatchT batch, common::Span data, } template -void CountRowOffsets(const AdapterBatchT& batch, common::Span offset, DeviceOrd device, +void CountRowOffsets(const AdapterBatchT& batch, common::Span offset, DeviceOrd device, float missing) { dh::safe_cuda(cudaSetDevice(device.ordinal)); IsValidFunctor is_valid(missing); diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 29fbae870270..aea1aa95deb1 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -67,12 +67,12 @@ struct TreeView { struct SparsePageView { common::Span d_data; - common::Span d_row_ptr; + common::Span d_row_ptr; bst_feature_t num_features; SparsePageView() = default; XGBOOST_DEVICE SparsePageView(common::Span data, - common::Span row_ptr, + common::Span row_ptr, bst_feature_t num_features) : d_data{data}, d_row_ptr{row_ptr}, num_features(num_features) {} [[nodiscard]] __device__ float GetElement(size_t ridx, size_t fidx) const { @@ -664,7 +664,7 @@ __global__ void MaskBitVectorKernel( } } -__device__ bst_node_t GetLeafIndexByBitVector(bst_row_t ridx, TreeView const& tree, +__device__ bst_node_t GetLeafIndexByBitVector(bst_idx_t ridx, TreeView const& tree, BitVector const& decision_bits, BitVector const& missing_bits, std::size_t num_nodes, std::size_t tree_offset) { @@ -682,7 +682,7 @@ __device__ bst_node_t GetLeafIndexByBitVector(bst_row_t ridx, TreeView const& tr return nidx; } -__device__ float GetLeafWeightByBitVector(bst_row_t ridx, TreeView const& tree, +__device__ float GetLeafWeightByBitVector(bst_idx_t ridx, TreeView const& tree, BitVector const& decision_bits, BitVector const& missing_bits, std::size_t num_nodes, std::size_t tree_offset) { @@ -1171,7 +1171,7 @@ class GPUPredictor : public xgboost::Predictor { auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device()); const MetaInfo& info = p_fmat->Info(); - bst_row_t num_rows = info.num_row_; + bst_idx_t num_rows = info.num_row_; if (tree_end == 0 || tree_end > model.trees.size()) { tree_end = static_cast(model.trees.size()); } @@ -1196,7 +1196,7 @@ class GPUPredictor : public xgboost::Predictor { for (auto const& batch : p_fmat->GetBatches()) { batch.data.SetDevice(ctx_->Device()); batch.offset.SetDevice(ctx_->Device()); - bst_row_t batch_offset = 0; + bst_idx_t batch_offset = 0; SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(), model.learner_model_param->num_feature}; size_t num_rows = batch.Size(); @@ -1219,7 +1219,7 @@ class GPUPredictor : public xgboost::Predictor { } } else { for (auto const& batch : p_fmat->GetBatches(ctx_, BatchParam{})) { - bst_row_t batch_offset = 0; + bst_idx_t batch_offset = 0; EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->Device())}; size_t num_rows = batch.Size(); auto grid = diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc index 1b2e9137cc3b..2a6d1b9c58db 100644 --- a/src/predictor/predictor.cc +++ b/src/predictor/predictor.cc @@ -9,7 +9,7 @@ #include // for string, to_string #include "../gbm/gbtree_model.h" // for GBTreeModel -#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_row_t +#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_idx_t #include "xgboost/context.h" // for Context #include "xgboost/data.h" // for MetaInfo #include "xgboost/host_device_vector.h" // for HostDeviceVector diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index 624c5b35d591..73af7115c385 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -214,7 +214,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) { dh::device_vector weight(n_samples * n_features, 0); dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream()); - dh::caching_device_vector columns_ptr(4); + dh::caching_device_vector columns_ptr(4); for (std::size_t i = 0; i < columns_ptr.size(); ++i) { columns_ptr[i] = i * n_samples; } @@ -795,7 +795,7 @@ TEST(HistUtil, AdapterSketchFromWeights) { namespace { class DeviceSketchWithHessianTest - : public ::testing::TestWithParam> { + : public ::testing::TestWithParam> { bst_feature_t n_features_ = 5; bst_group_t n_groups_{3}; @@ -910,9 +910,9 @@ class DeviceSketchWithHessianTest }; auto MakeParamsForTest() { - std::vector sizes = {1, 2, 256, 512, 1000, 1500}; + std::vector sizes = {1, 2, 256, 512, 1000, 1500}; std::vector bin_sizes = {2, 16, 256, 512}; - std::vector> configs; + std::vector> configs; for (auto n_samples : sizes) { for (auto n_bins : bin_sizes) { configs.emplace_back(true, n_samples, n_bins); diff --git a/tests/cpp/common/test_quantile.cu b/tests/cpp/common/test_quantile.cu index 26bd05524ded..070c705b55a6 100644 --- a/tests/cpp/common/test_quantile.cu +++ b/tests/cpp/common/test_quantile.cu @@ -27,7 +27,7 @@ TEST(GPUQuantile, Basic) { HostDeviceVector ft; SketchContainer sketch(ft, kBins, kCols, kRows, FstCU()); dh::caching_device_vector entries; - dh::device_vector cuts_ptr(kCols+1); + dh::device_vector cuts_ptr(kCols+1); thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0); // Push empty sketch.Push(dh::ToSpan(entries), dh::ToSpan(cuts_ptr), dh::ToSpan(cuts_ptr), 0); @@ -87,11 +87,11 @@ TEST(GPUQuantile, Unique) { // if with_error is true, the test tolerates floating point error void TestQuantileElemRank(DeviceOrd device, Span in, - Span d_columns_ptr, bool with_error = false) { + Span d_columns_ptr, bool with_error = false) { dh::safe_cuda(cudaSetDevice(device.ordinal)); std::vector h_in(in.size()); dh::CopyDeviceSpanToVector(&h_in, in); - std::vector h_columns_ptr(d_columns_ptr.size()); + std::vector h_columns_ptr(d_columns_ptr.size()); dh::CopyDeviceSpanToVector(&h_columns_ptr, d_columns_ptr); for (size_t i = 1; i < d_columns_ptr.size(); ++i) { @@ -164,7 +164,7 @@ TEST(GPUQuantile, MergeEmpty) { std::vector entries_before(sketch_0.Data().size()); dh::CopyDeviceSpanToVector(&entries_before, sketch_0.Data()); - std::vector ptrs_before(sketch_0.ColumnsPtr().size()); + std::vector ptrs_before(sketch_0.ColumnsPtr().size()); dh::CopyDeviceSpanToVector(&ptrs_before, sketch_0.ColumnsPtr()); thrust::device_vector columns_ptr(kCols + 1); // Merge an empty sketch @@ -172,7 +172,7 @@ TEST(GPUQuantile, MergeEmpty) { std::vector entries_after(sketch_0.Data().size()); dh::CopyDeviceSpanToVector(&entries_after, sketch_0.Data()); - std::vector ptrs_after(sketch_0.ColumnsPtr().size()); + std::vector ptrs_after(sketch_0.ColumnsPtr().size()); dh::CopyDeviceSpanToVector(&ptrs_after, sketch_0.ColumnsPtr()); CHECK_EQ(entries_before.size(), entries_after.size()); @@ -222,7 +222,7 @@ TEST(GPUQuantile, MergeBasic) { } auto columns_ptr = sketch_0.ColumnsPtr(); - std::vector h_columns_ptr(columns_ptr.size()); + std::vector h_columns_ptr(columns_ptr.size()); dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr); ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge); @@ -278,7 +278,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) { TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr()); auto columns_ptr = sketch_0.ColumnsPtr(); - std::vector h_columns_ptr(columns_ptr.size()); + std::vector h_columns_ptr(columns_ptr.size()); dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr); ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge); diff --git a/tests/cpp/common/test_span.cu b/tests/cpp/common/test_span.cu index 85c952340659..9c2bdc65cd34 100644 --- a/tests/cpp/common/test_span.cu +++ b/tests/cpp/common/test_span.cu @@ -1,14 +1,15 @@ -/*! - * Copyright 2018 XGBoost contributors +/** + * Copyright 2018-2024, XGBoost contributors */ #include - -#include #include #include +#include +#include + +#include // for iota #include "../../../src/common/device_helpers.cuh" -#include #include "test_span.h" namespace xgboost { diff --git a/tests/cpp/data/test_device_adapter.cu b/tests/cpp/data/test_device_adapter.cu index 2190dbe5bceb..61cc9463c228 100644 --- a/tests/cpp/data/test_device_adapter.cu +++ b/tests/cpp/data/test_device_adapter.cu @@ -62,7 +62,7 @@ TEST(DeviceAdapter, GetRowCounts) { .Device(ctx.Device()) .GenerateArrayInterface(&storage); auto adapter = CupyAdapter{str_arr}; - HostDeviceVector offset(adapter.NumRows() + 1, 0); + HostDeviceVector offset(adapter.NumRows() + 1, 0); offset.SetDevice(ctx.Device()); auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(), std::numeric_limits::quiet_NaN()); diff --git a/tests/cpp/plugin/test_sycl_predictor.cc b/tests/cpp/plugin/test_sycl_predictor.cc index d5b3a5e5cd9a..7bd788a3b071 100755 --- a/tests/cpp/plugin/test_sycl_predictor.cc +++ b/tests/cpp/plugin/test_sycl_predictor.cc @@ -43,7 +43,7 @@ TEST(SyclPredictor, ExternalMemory) { } TEST(SyclPredictor, InplacePredict) { - bst_row_t constexpr kRows{128}; + bst_idx_t constexpr kRows{128}; bst_feature_t constexpr kCols{64}; Context ctx; auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device()); @@ -106,4 +106,4 @@ TEST(SyclPredictor, Multi) { TestVectorLeafPrediction(&ctx); } -} // namespace xgboost \ No newline at end of file +} // namespace xgboost diff --git a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu index 862bc6bfcca9..f4accfc8a7b4 100644 --- a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu +++ b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu @@ -363,7 +363,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) { GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{0}; - thrust::device_vector feature_segments = std::vector{0, 2}; + thrust::device_vector feature_segments = std::vector{0, 2}; thrust::device_vector feature_values = std::vector{1.0, 2.0}; thrust::device_vector feature_min_values = std::vector{0.0}; auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}}); @@ -412,7 +412,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) { GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{1}; - thrust::device_vector feature_segments = std::vector{0, 2, 4}; + thrust::device_vector feature_segments = std::vector{0, 2, 4}; thrust::device_vector feature_values = std::vector{1.0, 2.0, 11.0, 12.0}; thrust::device_vector feature_min_values = std::vector{0.0, 10.0}; auto feature_histogram = @@ -446,7 +446,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) { GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{0, 1}; - thrust::device_vector feature_segments = std::vector{0, 2, 4}; + thrust::device_vector feature_segments = std::vector{0, 2, 4}; thrust::device_vector feature_values = std::vector{1.0, 2.0, 11.0, 12.0}; thrust::device_vector feature_min_values = std::vector{0.0, 10.0}; auto feature_histogram = @@ -478,7 +478,7 @@ TEST(GpuHist, EvaluateSplits) { GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{0, 1}; - thrust::device_vector feature_segments = std::vector{0, 2, 4}; + thrust::device_vector feature_segments = std::vector{0, 2, 4}; thrust::device_vector feature_values = std::vector{1.0, 2.0, 11.0, 12.0}; thrust::device_vector feature_min_values = std::vector{0.0, 0.0}; auto feature_histogram_left =