Skip to content

Commit

Permalink
Fix.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Mar 13, 2024
1 parent 3039f96 commit 6c0e235
Show file tree
Hide file tree
Showing 18 changed files with 48 additions and 50 deletions.
2 changes: 2 additions & 0 deletions src/collective/device_communicator_adapter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
*/
#pragma once

#include <numeric> // for accumulate

#include "communicator.h"
#include "device_communicator.cuh"

Expand Down
3 changes: 0 additions & 3 deletions src/common/device_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,16 @@
#include <thrust/unique.h>

#include <algorithm>
#include <chrono>
#include <cstddef> // for size_t
#include <cub/cub.cuh>
#include <cub/util_allocator.cuh>
#include <numeric>
#include <sstream>
#include <string>
#include <tuple>
#include <vector>

#include "../collective/communicator-inl.h"
#include "common.h"
#include "xgboost/global_config.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/logging.h"
#include "xgboost/span.h"
Expand Down
4 changes: 1 addition & 3 deletions src/common/hist_util.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include <xgboost/logging.h>

#include <cstddef> // for size_t
#include <memory>
#include <mutex>
#include <utility>
#include <vector>

Expand Down Expand Up @@ -123,7 +121,7 @@ void SortByWeight(dh::device_vector<float>* weights, dh::device_vector<Entry>* s
[=] __device__(const Entry& a, const Entry& b) { return a.index == b.index; });
}

void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_row_t> d_cuts_ptr,
void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_idx_t> d_cuts_ptr,
dh::device_vector<Entry>* p_sorted_entries,
dh::device_vector<float>* p_sorted_weights,
dh::caching_device_vector<size_t>* p_column_sizes_scan) {
Expand Down
2 changes: 1 addition & 1 deletion src/common/hist_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter, Ran
void SortByWeight(dh::device_vector<float>* weights,
dh::device_vector<Entry>* sorted_entries);

void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_row_t> d_cuts_ptr,
void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_idx_t> d_cuts_ptr,
dh::device_vector<Entry>* p_sorted_entries,
dh::device_vector<float>* p_sorted_weights,
dh::caching_device_vector<size_t>* p_column_sizes_scan);
Expand Down
2 changes: 1 addition & 1 deletion src/common/host_device_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ template class HostDeviceVector<uint8_t>;
template class HostDeviceVector<int8_t>;
template class HostDeviceVector<FeatureType>;
template class HostDeviceVector<Entry>;
template class HostDeviceVector<uint64_t>; // bst_row_t
template class HostDeviceVector<bst_idx_t>;
template class HostDeviceVector<uint32_t>; // bst_feature_t

#if defined(__APPLE__) || defined(__EMSCRIPTEN__)
Expand Down
2 changes: 1 addition & 1 deletion src/common/host_device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ template class HostDeviceVector<uint8_t>;
template class HostDeviceVector<int8_t>;
template class HostDeviceVector<FeatureType>;
template class HostDeviceVector<Entry>;
template class HostDeviceVector<uint64_t>; // bst_row_t
template class HostDeviceVector<bst_idx_t>;
template class HostDeviceVector<uint32_t>; // bst_feature_t
template class HostDeviceVector<RegTree::Node>;
template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
Expand Down
10 changes: 5 additions & 5 deletions src/common/quantile.cu
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ void CopyTo(Span<T> out, Span<U> src) {

// Compute the merge path.
common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
Span<SketchEntry const> const &d_x, Span<bst_row_t const> const &x_ptr,
Span<SketchEntry const> const &d_y, Span<bst_row_t const> const &y_ptr,
Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
Span<SketchEntry const> const &d_x, Span<bst_idx_t const> const &x_ptr,
Span<SketchEntry const> const &d_y, Span<bst_idx_t const> const &y_ptr,
Span<SketchEntry> out, Span<bst_idx_t> out_ptr) {
auto x_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple(
dh::MakeTransformIterator<bst_idx_t>(
thrust::make_counting_iterator(0ul),
Expand Down Expand Up @@ -206,8 +206,8 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
// run it in 2 passes to obtain the merge path and then customize the standard merge
// algorithm.
void MergeImpl(DeviceOrd device, Span<SketchEntry const> const &d_x,
Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
Span<bst_idx_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
Span<bst_idx_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_idx_t> out_ptr) {
dh::safe_cuda(cudaSetDevice(device.ordinal));
CHECK_EQ(d_x.size() + d_y.size(), out.size());
CHECK_EQ(x_ptr.size(), out_ptr.size());
Expand Down
2 changes: 1 addition & 1 deletion src/data/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
#include "simple_dmatrix.h" // for SimpleDMatrix
#include "sparse_page_writer.h" // for SparsePageFormatReg
#include "validation.h" // for LabelsCheck, WeightsCheck, ValidateQueryGroup
#include "xgboost/base.h" // for bst_group_t, bst_row_t, bst_float, bst_ulong
#include "xgboost/base.h" // for bst_group_t, bst_idx_t, bst_float, bst_ulong
#include "xgboost/context.h" // for Context
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/learner.h" // for HostDeviceVector
Expand Down
6 changes: 3 additions & 3 deletions src/data/device_adapter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {

// Returns maximum row length
template <typename AdapterBatchT>
std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, DeviceOrd device,
float missing) {
bst_idx_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_idx_t> offset, DeviceOrd device,
float missing) {
dh::safe_cuda(cudaSetDevice(device.ordinal));
IsValidFunctor is_valid(missing);
dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
Expand Down Expand Up @@ -248,7 +248,7 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
bst_idx_t row_stride =
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
thrust::device_pointer_cast(offset.data()) + offset.size(),
static_cast<bst_idx_t>(0), thrust::maximum<bst_row_t>());
static_cast<bst_idx_t>(0), thrust::maximum<bst_idx_t>());
return row_stride;
}

Expand Down
2 changes: 1 addition & 1 deletion src/data/simple_dmatrix.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void CopyDataToDMatrix(AdapterBatchT batch, common::Span<Entry> data,
}

template <typename AdapterBatchT>
void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset, DeviceOrd device,
void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_idx_t> offset, DeviceOrd device,
float missing) {
dh::safe_cuda(cudaSetDevice(device.ordinal));
IsValidFunctor is_valid(missing);
Expand Down
14 changes: 7 additions & 7 deletions src/predictor/gpu_predictor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ struct TreeView {

struct SparsePageView {
common::Span<const Entry> d_data;
common::Span<const bst_row_t> d_row_ptr;
common::Span<const bst_idx_t> d_row_ptr;
bst_feature_t num_features;

SparsePageView() = default;
XGBOOST_DEVICE SparsePageView(common::Span<const Entry> data,
common::Span<const bst_row_t> row_ptr,
common::Span<const bst_idx_t> row_ptr,
bst_feature_t num_features)
: d_data{data}, d_row_ptr{row_ptr}, num_features(num_features) {}
[[nodiscard]] __device__ float GetElement(size_t ridx, size_t fidx) const {
Expand Down Expand Up @@ -664,7 +664,7 @@ __global__ void MaskBitVectorKernel(
}
}

__device__ bst_node_t GetLeafIndexByBitVector(bst_row_t ridx, TreeView const& tree,
__device__ bst_node_t GetLeafIndexByBitVector(bst_idx_t ridx, TreeView const& tree,
BitVector const& decision_bits,
BitVector const& missing_bits, std::size_t num_nodes,
std::size_t tree_offset) {
Expand All @@ -682,7 +682,7 @@ __device__ bst_node_t GetLeafIndexByBitVector(bst_row_t ridx, TreeView const& tr
return nidx;
}

__device__ float GetLeafWeightByBitVector(bst_row_t ridx, TreeView const& tree,
__device__ float GetLeafWeightByBitVector(bst_idx_t ridx, TreeView const& tree,
BitVector const& decision_bits,
BitVector const& missing_bits, std::size_t num_nodes,
std::size_t tree_offset) {
Expand Down Expand Up @@ -1171,7 +1171,7 @@ class GPUPredictor : public xgboost::Predictor {
auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device());

const MetaInfo& info = p_fmat->Info();
bst_row_t num_rows = info.num_row_;
bst_idx_t num_rows = info.num_row_;
if (tree_end == 0 || tree_end > model.trees.size()) {
tree_end = static_cast<uint32_t>(model.trees.size());
}
Expand All @@ -1196,7 +1196,7 @@ class GPUPredictor : public xgboost::Predictor {
for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(ctx_->Device());
batch.offset.SetDevice(ctx_->Device());
bst_row_t batch_offset = 0;
bst_idx_t batch_offset = 0;
SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
model.learner_model_param->num_feature};
size_t num_rows = batch.Size();
Expand All @@ -1219,7 +1219,7 @@ class GPUPredictor : public xgboost::Predictor {
}
} else {
for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
bst_row_t batch_offset = 0;
bst_idx_t batch_offset = 0;
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->Device())};
size_t num_rows = batch.Size();
auto grid =
Expand Down
2 changes: 1 addition & 1 deletion src/predictor/predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <string> // for string, to_string

#include "../gbm/gbtree_model.h" // for GBTreeModel
#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_row_t
#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_idx_t
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
Expand Down
8 changes: 4 additions & 4 deletions tests/cpp/common/test_hist_util.cu
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
dh::device_vector<float> weight(n_samples * n_features, 0);
dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());

dh::caching_device_vector<bst_row_t> columns_ptr(4);
dh::caching_device_vector<bst_idx_t> columns_ptr(4);
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
columns_ptr[i] = i * n_samples;
}
Expand Down Expand Up @@ -795,7 +795,7 @@ TEST(HistUtil, AdapterSketchFromWeights) {

namespace {
class DeviceSketchWithHessianTest
: public ::testing::TestWithParam<std::tuple<bool, bst_row_t, bst_bin_t>> {
: public ::testing::TestWithParam<std::tuple<bool, bst_idx_t, bst_bin_t>> {
bst_feature_t n_features_ = 5;
bst_group_t n_groups_{3};

Expand Down Expand Up @@ -910,9 +910,9 @@ class DeviceSketchWithHessianTest
};

auto MakeParamsForTest() {
std::vector<bst_row_t> sizes = {1, 2, 256, 512, 1000, 1500};
std::vector<bst_idx_t> sizes = {1, 2, 256, 512, 1000, 1500};
std::vector<bst_bin_t> bin_sizes = {2, 16, 256, 512};
std::vector<std::tuple<bool, bst_row_t, bst_bin_t>> configs;
std::vector<std::tuple<bool, bst_idx_t, bst_bin_t>> configs;
for (auto n_samples : sizes) {
for (auto n_bins : bin_sizes) {
configs.emplace_back(true, n_samples, n_bins);
Expand Down
14 changes: 7 additions & 7 deletions tests/cpp/common/test_quantile.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ TEST(GPUQuantile, Basic) {
HostDeviceVector<FeatureType> ft;
SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
dh::caching_device_vector<Entry> entries;
dh::device_vector<bst_row_t> cuts_ptr(kCols+1);
dh::device_vector<bst_idx_t> cuts_ptr(kCols+1);
thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
// Push empty
sketch.Push(dh::ToSpan(entries), dh::ToSpan(cuts_ptr), dh::ToSpan(cuts_ptr), 0);
Expand Down Expand Up @@ -87,11 +87,11 @@ TEST(GPUQuantile, Unique) {

// if with_error is true, the test tolerates floating point error
void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
Span<bst_idx_t const> d_columns_ptr, bool with_error = false) {
dh::safe_cuda(cudaSetDevice(device.ordinal));
std::vector<SketchEntry> h_in(in.size());
dh::CopyDeviceSpanToVector(&h_in, in);
std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
std::vector<bst_idx_t> h_columns_ptr(d_columns_ptr.size());
dh::CopyDeviceSpanToVector(&h_columns_ptr, d_columns_ptr);

for (size_t i = 1; i < d_columns_ptr.size(); ++i) {
Expand Down Expand Up @@ -164,15 +164,15 @@ TEST(GPUQuantile, MergeEmpty) {

std::vector<SketchEntry> entries_before(sketch_0.Data().size());
dh::CopyDeviceSpanToVector(&entries_before, sketch_0.Data());
std::vector<bst_row_t> ptrs_before(sketch_0.ColumnsPtr().size());
std::vector<bst_idx_t> ptrs_before(sketch_0.ColumnsPtr().size());
dh::CopyDeviceSpanToVector(&ptrs_before, sketch_0.ColumnsPtr());
thrust::device_vector<size_t> columns_ptr(kCols + 1);
// Merge an empty sketch
sketch_0.Merge(dh::ToSpan(columns_ptr), Span<SketchEntry>{});

std::vector<SketchEntry> entries_after(sketch_0.Data().size());
dh::CopyDeviceSpanToVector(&entries_after, sketch_0.Data());
std::vector<bst_row_t> ptrs_after(sketch_0.ColumnsPtr().size());
std::vector<bst_idx_t> ptrs_after(sketch_0.ColumnsPtr().size());
dh::CopyDeviceSpanToVector(&ptrs_after, sketch_0.ColumnsPtr());

CHECK_EQ(entries_before.size(), entries_after.size());
Expand Down Expand Up @@ -222,7 +222,7 @@ TEST(GPUQuantile, MergeBasic) {
}

auto columns_ptr = sketch_0.ColumnsPtr();
std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);

Expand Down Expand Up @@ -278,7 +278,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());

auto columns_ptr = sketch_0.ColumnsPtr();
std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);

Expand Down
11 changes: 6 additions & 5 deletions tests/cpp/common/test_span.cu
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
/*!
* Copyright 2018 XGBoost contributors
/**
* Copyright 2018-2024, XGBoost contributors
*/
#include <gtest/gtest.h>

#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/host_vector.h>
#include <xgboost/span.h>

#include <numeric> // for iota

#include "../../../src/common/device_helpers.cuh"
#include <xgboost/span.h>
#include "test_span.h"

namespace xgboost {
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/data/test_device_adapter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ TEST(DeviceAdapter, GetRowCounts) {
.Device(ctx.Device())
.GenerateArrayInterface(&storage);
auto adapter = CupyAdapter{str_arr};
HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
HostDeviceVector<bst_idx_t> offset(adapter.NumRows() + 1, 0);
offset.SetDevice(ctx.Device());
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
std::numeric_limits<float>::quiet_NaN());
Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/plugin/test_sycl_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ TEST(SyclPredictor, ExternalMemory) {
}

TEST(SyclPredictor, InplacePredict) {
bst_row_t constexpr kRows{128};
bst_idx_t constexpr kRows{128};
bst_feature_t constexpr kCols{64};
Context ctx;
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
Expand Down Expand Up @@ -106,4 +106,4 @@ TEST(SyclPredictor, Multi) {
TestVectorLeafPrediction(&ctx);
}

} // namespace xgboost
} // namespace xgboost
8 changes: 4 additions & 4 deletions tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
GPUTrainingParam param{tparam};

thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2};
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});
Expand Down Expand Up @@ -412,7 +412,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
GPUTrainingParam param{tparam};

thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{1};
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
auto feature_histogram =
Expand Down Expand Up @@ -446,7 +446,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
GPUTrainingParam param{tparam};

thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
auto feature_histogram =
Expand Down Expand Up @@ -478,7 +478,7 @@ TEST(GpuHist, EvaluateSplits) {
GPUTrainingParam param{tparam};

thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 0.0};
auto feature_histogram_left =
Expand Down

0 comments on commit 6c0e235

Please sign in to comment.