From 0953fea9f03e3d2a8ecacbc05f0d1c65b1f2b844 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Tue, 12 Mar 2024 21:50:34 +0800
Subject: [PATCH 1/5] Use `std::uint64_t` for row index.

- Use std::uint64_t instead of size_t to avoid implementation defined type.

  It's difficult to define template instantiation with implementation defined type.

- Small cleanup to the base header.
---
 include/xgboost/base.h | 48 ++++++++++++++++++------------------------
 1 file changed, 21 insertions(+), 27 deletions(-)
diff --git a/include/xgboost/base.h b/include/xgboost/base.h
index 1f94c9b2fd1d..c20d1625a06f 100644
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -1,20 +1,18 @@
 /**
- * Copyright 2015-2023 by XGBoost Contributors
+ * Copyright 2015-2024, XGBoost Contributors
  * \file base.h
  * \brief Defines configuration macros and basic types for xgboost.
  */
 #ifndef XGBOOST_BASE_H_
 #define XGBOOST_BASE_H_
 
-#include <dmlc/base.h>
-#include <dmlc/omp.h>
+#include <dmlc/omp.h>  // for omp_uint, omp_ulong
 
-#include <cmath>
-#include <cstdint>
-#include <iostream>
-#include <string>
-#include <utility>
-#include <vector>
+#include <cstdint>  // for int32_t, uint64_t, int16_t
+#include <ostream>  // for ostream
+#include <string>   // for string
+#include <utility>  // for pair
+#include <vector>   // for vector
 
 /*!
  * \brief string flag for R library, to leave hooks when needed.
@@ -86,34 +84,31 @@
 
 #endif  // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()
 
-/*! \brief namespace of xgboost*/
 namespace xgboost {
-
 /*! \brief unsigned integer type used for feature index. */
-using bst_uint = uint32_t;  // NOLINT
+using bst_uint = std::uint32_t;  // NOLINT
 /*! \brief unsigned long integers */
-using bst_ulong = uint64_t;  // NOLINT
+using bst_ulong = std::uint64_t;  // NOLINT
 /*! \brief float type, used for storing statistics */
 using bst_float = float;  // NOLINT
 /*! \brief Categorical value type. */
-using bst_cat_t = int32_t;  // NOLINT
+using bst_cat_t = std::int32_t;  // NOLINT
 /*! \brief Type for data column (feature) index. */
-using bst_feature_t = uint32_t;  // NOLINT
-/*! \brief Type for histogram bin index. */
-using bst_bin_t = int32_t;  // NOLINT
-/*! \brief Type for data row index.
- *
- * Be careful `std::size_t' is implementation-defined.  Meaning that the binary
- * representation of DMatrix might not be portable across platform.  Booster model should
- * be portable as parameters are floating points.
+using bst_feature_t = std::uint32_t;  // NOLINT
+/**
+ * @brief Type for histogram bin index.  We sometimes use -1 to indicate invalid bin.
  */
-using bst_row_t = std::size_t;   // NOLINT
+using bst_bin_t = std::int32_t;  // NOLINT
+/**
+ * @brief Type for data row index (sample).
+ */
+using bst_row_t = std::uint64_t;  // NOLINT
 /*! \brief Type for tree node index. */
 using bst_node_t = std::int32_t;      // NOLINT
 /*! \brief Type for ranking group index. */
 using bst_group_t = std::uint32_t;  // NOLINT
 /**
- * \brief Type for indexing into output targets.
+ * @brief Type for indexing into output targets.
  */
 using bst_target_t = std::uint32_t;  // NOLINT
 /**
@@ -306,8 +301,7 @@ class GradientPairInt64 {
   XGBOOST_DEVICE bool operator==(const GradientPairInt64 &rhs) const {
     return grad_ == rhs.grad_ && hess_ == rhs.hess_;
   }
-  friend std::ostream &operator<<(std::ostream &os,
-                                  const GradientPairInt64 &g) {
+  friend std::ostream &operator<<(std::ostream &os, const GradientPairInt64 &g) {
     os << g.GetQuantisedGrad() << "/" << g.GetQuantisedHess();
     return os;
   }
@@ -323,7 +317,7 @@ using omp_ulong = dmlc::omp_ulong;  // NOLINT
 /*! \brief define unsigned int for openmp loop */
 using bst_omp_uint = dmlc::omp_uint;  // NOLINT
 /*! \brief Type used for representing version number in binary form.*/
-using XGBoostVersionT = int32_t;
+using XGBoostVersionT = std::int32_t;
 }  // namespace xgboost
 
 #endif  // XGBOOST_BASE_H_

From ce0c3596d523a823a48a5c2c83dfb675560aaf4f Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 13 Mar 2024 17:56:35 +0800
Subject: [PATCH 2/5] Build on i386

---
 include/xgboost/base.h                      |  2 +-
 include/xgboost/data.h                      |  4 +-
 src/common/column_matrix.h                  |  6 +--
 src/common/hist_util.cc                     | 10 ++---
 src/common/hist_util.cu                     |  8 ++--
 src/common/hist_util.cuh                    |  4 +-
 src/common/quantile.cc                      | 28 +++++++-------
 src/common/quantile.cu                      |  4 +-
 src/common/quantile.cuh                     |  6 +--
 src/common/quantile.h                       | 26 ++++++-------
 src/data/adapter.h                          | 42 +++++++++------------
 src/data/data.cc                            |  6 +--
 src/data/device_adapter.cuh                 | 18 ++++-----
 src/data/gradient_index.cc                  |  2 +-
 src/data/gradient_index.h                   |  6 +--
 src/data/iterative_dmatrix.cc               |  2 +-
 src/data/simple_dmatrix.cc                  |  2 +-
 src/predictor/cpu_predictor.cc              |  6 +--
 src/predictor/gpu_predictor.cu              | 12 +++---
 src/predictor/predictor.cc                  |  2 +-
 src/tree/common_row_partitioner.h           |  6 +--
 src/tree/gpu_hist/gradient_based_sampler.cu |  2 +-
 src/tree/hist/sampler.h                     |  2 +-
 src/tree/updater_gpu_hist.cu                |  2 +-
 tests/cpp/c_api/test_c_api.cc               |  8 ++--
 tests/cpp/common/test_hist_util.cu          | 12 +++---
 tests/cpp/common/test_quantile.cc           |  4 +-
 tests/cpp/data/test_adapter.cc              |  4 +-
 tests/cpp/data/test_data.cc                 |  2 +-
 tests/cpp/data/test_metainfo.cc             |  2 +-
 tests/cpp/data/test_simple_dmatrix.cc       |  2 +-
 tests/cpp/gbm/test_gbtree.cc                |  2 +-
 tests/cpp/gbm/test_gbtree.cu                |  2 +-
 tests/cpp/helpers.cc                        | 14 +++----
 tests/cpp/helpers.h                         | 16 ++++----
 tests/cpp/histogram_helpers.h               |  2 +-
 tests/cpp/predictor/test_cpu_predictor.cc   |  4 +-
 tests/cpp/predictor/test_predictor.cc       |  4 +-
 tests/cpp/predictor/test_predictor.h        |  2 +-
 tests/cpp/test_helpers.cc                   |  2 +-
 tests/cpp/test_learner.cc                   |  2 +-
 tests/cpp/tree/hist/test_histogram.cc       |  4 +-
 tests/cpp/tree/test_gpu_hist.cu             |  4 +-
 tests/cpp/tree/test_quantile_hist.cc        |  2 +-
 tests/cpp/tree/test_refresh.cc              |  2 +-
 45 files changed, 148 insertions(+), 156 deletions(-)

diff --git a/include/xgboost/base.h b/include/xgboost/base.h
index c20d1625a06f..9abe72b87859 100644
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -102,7 +102,7 @@ using bst_bin_t = std::int32_t;  // NOLINT
 /**
  * @brief Type for data row index (sample).
  */
-using bst_row_t = std::uint64_t;  // NOLINT
+using bst_idx_t = std::uint64_t;  // NOLINT
 /*! \brief Type for tree node index. */
 using bst_node_t = std::int32_t;      // NOLINT
 /*! \brief Type for ranking group index. */
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
index 08d3d119a8ff..2bdf3713dbe1 100644
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -315,7 +315,7 @@ struct BatchParam {
 struct HostSparsePageView {
   using Inst = common::Span<Entry const>;
 
-  common::Span<bst_row_t const> offset;
+  common::Span<bst_idx_t const> offset;
   common::Span<Entry const> data;
 
   Inst operator[](size_t i) const {
@@ -333,7 +333,7 @@ struct HostSparsePageView {
 class SparsePage {
  public:
   // Offset for each row.
-  HostDeviceVector<bst_row_t> offset;
+  HostDeviceVector<bst_idx_t> offset;
   /*! \brief the data of the segments */
   HostDeviceVector<Entry> data;
 
diff --git a/src/common/column_matrix.h b/src/common/column_matrix.h
index 440f3c0a87c8..843cee80fbc9 100644
--- a/src/common/column_matrix.h
+++ b/src/common/column_matrix.h
@@ -72,7 +72,7 @@ class SparseColumnIter : public Column<BinIdxT> {
 
  public:
   SparseColumnIter(common::Span<const BinIdxT> index, bst_bin_t least_bin_idx,
-                   common::Span<const size_t> row_ind, bst_row_t first_row_idx)
+                   common::Span<const size_t> row_ind, bst_idx_t first_row_idx)
       : Base{index, least_bin_idx}, row_ind_(row_ind) {
     // first_row_id is the first row in the leaf partition
     const size_t* row_data = RowIndices();
@@ -301,7 +301,7 @@ class ColumnMatrix {
   }
 
   template <typename BinIdxType>
-  auto SparseColumn(bst_feature_t fidx, bst_row_t first_row_idx) const {
+  auto SparseColumn(bst_feature_t fidx, bst_idx_t first_row_idx) const {
     const size_t feature_offset = feature_offsets_[fidx];  // to get right place for certain feature
     const size_t column_size = feature_offsets_[fidx + 1] - feature_offset;
     common::Span<const BinIdxType> bin_index = {
@@ -325,7 +325,7 @@ class ColumnMatrix {
   // all columns are dense column and has no missing value
   // FIXME(jiamingy): We don't need a column matrix if there's no missing value.
   template <typename RowBinIdxT>
-  void SetIndexNoMissing(bst_row_t base_rowid, RowBinIdxT const* row_index, const size_t n_samples,
+  void SetIndexNoMissing(bst_idx_t base_rowid, RowBinIdxT const* row_index, const size_t n_samples,
                          const size_t n_features, int32_t n_threads) {
     missing_.GrowTo(feature_offsets_[n_features], false);
 
diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc
index f101247920a4..9b703a3fa13a 100644
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -34,7 +34,7 @@ HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins
   HistogramCuts out;
   auto const &info = m->Info();
   auto n_threads = ctx->Threads();
-  std::vector<bst_row_t> reduced(info.num_col_, 0);
+  std::vector<bst_idx_t> reduced(info.num_col_, 0);
   for (auto const &page : m->GetBatches<SparsePage>()) {
     auto const &entries_per_column =
         CalcColumnSize(data::SparsePageAdapterBatch{page.GetView()}, info.num_col_, n_threads,
@@ -209,10 +209,10 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
     CHECK(offsets);
   }
 
-  auto get_row_ptr = [&](bst_row_t ridx) {
+  auto get_row_ptr = [&](bst_idx_t ridx) {
     return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
   };
-  auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
+  auto get_rid = [&](bst_idx_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
 
   const size_t n_features =
       get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
@@ -275,10 +275,10 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
   auto const &row_ptr = gmat.row_ptr.data();
   auto base_rowid = gmat.base_rowid;
   const uint32_t *offsets = gmat.index.Offset();
-  auto get_row_ptr = [&](bst_row_t ridx) {
+  auto get_row_ptr = [&](bst_idx_t ridx) {
     return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
   };
-  auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
+  auto get_rid = [&](bst_idx_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
 
   const size_t n_features = gmat.cut.Ptrs().size() - 1;
   const size_t n_columns = n_features;
diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu
index fbe6356bf501..a2fda7017b8b 100644
--- a/src/common/hist_util.cu
+++ b/src/common/hist_util.cu
@@ -39,7 +39,7 @@ size_t RequiredSampleCutsPerColumn(int max_bins, size_t num_rows) {
   return std::min(num_cuts, num_rows);
 }
 
-size_t RequiredSampleCuts(bst_row_t num_rows, bst_feature_t num_columns,
+size_t RequiredSampleCuts(bst_idx_t num_rows, bst_feature_t num_columns,
                           size_t max_bins, size_t nnz) {
   auto per_column = RequiredSampleCutsPerColumn(max_bins, num_rows);
   auto if_dense = num_columns * per_column;
@@ -47,7 +47,7 @@ size_t RequiredSampleCuts(bst_row_t num_rows, bst_feature_t num_columns,
   return result;
 }
 
-size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz,
+size_t RequiredMemory(bst_idx_t num_rows, bst_feature_t num_columns, size_t nnz,
                       size_t num_bins, bool with_weights) {
   size_t peak = 0;
   // 0. Allocate cut pointer in quantile container by increasing: n_columns + 1
@@ -85,7 +85,7 @@ size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz,
   return peak;
 }
 
-size_t SketchBatchNumElements(size_t sketch_batch_num_elements, bst_row_t num_rows,
+size_t SketchBatchNumElements(size_t sketch_batch_num_elements, bst_idx_t num_rows,
                               bst_feature_t columns, size_t nnz, int device, size_t num_cuts,
                               bool has_weight) {
   auto constexpr kIntMax = static_cast<std::size_t>(std::numeric_limits<std::int32_t>::max());
@@ -210,7 +210,7 @@ void ProcessWeightedBatch(Context const* ctx, const SparsePage& page, MetaInfo c
     sorted_entries = dh::device_vector<Entry>(h_data.begin() + begin, h_data.begin() + end);
   }
 
-  bst_row_t base_rowid = page.base_rowid;
+  bst_idx_t base_rowid = page.base_rowid;
 
   dh::device_vector<float> entry_weight;
   auto cuctx = ctx->CUDACtx();
diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh
index 3cd13030ef40..e899f2a1ce07 100644
--- a/src/common/hist_util.cuh
+++ b/src/common/hist_util.cuh
@@ -186,7 +186,7 @@ inline size_t constexpr BytesPerElement(bool has_weight) {
  *        directly if it's not 0.
  */
 size_t SketchBatchNumElements(size_t sketch_batch_num_elements,
-                              bst_row_t num_rows, bst_feature_t columns,
+                              bst_idx_t num_rows, bst_feature_t columns,
                               size_t nnz, int device,
                               size_t num_cuts, bool has_weight);
 
@@ -209,7 +209,7 @@ size_t RequiredSampleCutsPerColumn(int max_bins, size_t num_rows);
  *
  * \return The estimated bytes
  */
-size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz,
+size_t RequiredMemory(bst_idx_t num_rows, bst_feature_t num_columns, size_t nnz,
                       size_t num_bins, bool with_weights);
 
 // Count the valid entries in each column and copy them out.
diff --git a/src/common/quantile.cc b/src/common/quantile.cc
index e521fae69b1d..4ae6ecd36362 100644
--- a/src/common/quantile.cc
+++ b/src/common/quantile.cc
@@ -14,7 +14,7 @@
 namespace xgboost::common {
 template <typename WQSketch>
 SketchContainerImpl<WQSketch>::SketchContainerImpl(Context const *ctx,
-                                                   std::vector<bst_row_t> columns_size,
+                                                   std::vector<bst_idx_t> columns_size,
                                                    int32_t max_bins,
                                                    Span<FeatureType const> feature_types,
                                                    bool use_group)
@@ -120,8 +120,8 @@ namespace {
 template <typename T>
 struct QuantileAllreduce {
   common::Span<T> global_values;
-  common::Span<size_t> worker_indptr;
-  common::Span<size_t> feature_indptr;
+  common::Span<bst_idx_t> worker_indptr;
+  common::Span<bst_idx_t> feature_indptr;
   size_t n_features{0};
   /**
    * \brief Get sketch values of the a feature from a worker.
@@ -147,7 +147,7 @@ template <typename WQSketch>
 void SketchContainerImpl<WQSketch>::GatherSketchInfo(
     Context const *ctx, MetaInfo const &info,
     std::vector<typename WQSketch::SummaryContainer> const &reduced,
-    std::vector<size_t> *p_worker_segments, std::vector<bst_row_t> *p_sketches_scan,
+    std::vector<bst_idx_t> *p_worker_segments, std::vector<bst_idx_t> *p_sketches_scan,
     std::vector<typename WQSketch::Entry> *p_global_sketches) {
   auto &worker_segments = *p_worker_segments;
   worker_segments.resize(1, 0);
@@ -156,7 +156,7 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
   auto n_columns = sketches_.size();
 
   // get the size of each feature.
-  std::vector<bst_row_t> sketch_size;
+  std::vector<bst_idx_t> sketch_size;
   for (size_t i = 0; i < reduced.size(); ++i) {
     if (IsCat(feature_types_, i)) {
       sketch_size.push_back(0);
@@ -165,7 +165,7 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
     }
   }
   // turn the size into CSC indptr
-  std::vector<bst_row_t> &sketches_scan = *p_sketches_scan;
+  std::vector<bst_idx_t> &sketches_scan = *p_sketches_scan;
   sketches_scan.resize((n_columns + 1) * world, 0);
   size_t beg_scan = rank * (n_columns + 1);  // starting storage for current worker.
   std::partial_sum(sketch_size.cbegin(), sketch_size.cend(), sketches_scan.begin() + beg_scan + 1);
@@ -226,7 +226,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const* ctx, Meta
   CHECK_EQ(feature_ptr.front(), 0);
 
   // gather all feature ptrs from workers
-  std::vector<size_t> global_feat_ptrs(feature_ptr.size() * world_size, 0);
+  std::vector<bst_idx_t> global_feat_ptrs(feature_ptr.size() * world_size, 0);
   size_t feat_begin = rank * feature_ptr.size();  // pointer to current worker
   std::copy(feature_ptr.begin(), feature_ptr.end(), global_feat_ptrs.begin() + feat_begin);
   auto rc = collective::GlobalSum(
@@ -241,7 +241,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const* ctx, Meta
   }
 
   // indptr for indexing workers
-  std::vector<size_t> global_worker_ptr(world_size + 1, 0);
+  std::vector<bst_idx_t> global_worker_ptr(world_size + 1, 0);
   global_worker_ptr[rank + 1] = total;  // shift 1 to right for constructing the indptr
   rc = collective::GlobalSum(ctx, info,
                              linalg::MakeVec(global_worker_ptr.data(), global_worker_ptr.size()));
@@ -298,14 +298,14 @@ void SketchContainerImpl<WQSketch>::AllReduce(
   reduced.resize(sketches_.size());
 
   // Prune the intermediate num cuts for synchronization.
-  std::vector<bst_row_t> global_column_size(columns_size_);
+  std::vector<bst_idx_t> global_column_size(columns_size_);
   auto rc = collective::GlobalSum(
       ctx, info, linalg::MakeVec(global_column_size.data(), global_column_size.size()));
   collective::SafeColl(rc);
 
   ParallelFor(sketches_.size(), n_threads_, [&](size_t i) {
     int32_t intermediate_num_cuts = static_cast<int32_t>(
-        std::min(global_column_size[i], static_cast<size_t>(max_bins_ * WQSketch::kFactor)));
+        std::min(global_column_size[i], static_cast<bst_idx_t>(max_bins_ * WQSketch::kFactor)));
     if (global_column_size[i] == 0) {
       return;
     }
@@ -327,8 +327,8 @@ void SketchContainerImpl<WQSketch>::AllReduce(
     return;
   }
 
-  std::vector<size_t> worker_segments(1, 0);  // CSC pointer to sketches.
-  std::vector<bst_row_t> sketches_scan((n_columns + 1) * world, 0);
+  std::vector<bst_idx_t> worker_segments(1, 0);  // CSC pointer to sketches.
+  std::vector<bst_idx_t> sketches_scan((n_columns + 1) * world, 0);
 
   std::vector<typename WQSketch::Entry> global_sketches;
   this->GatherSketchInfo(ctx, info, reduced, &worker_segments, &sketches_scan, &global_sketches);
@@ -452,11 +452,11 @@ template class SketchContainerImpl<WXQuantileSketch<float, float>>;
 
 HostSketchContainer::HostSketchContainer(Context const *ctx, bst_bin_t max_bins,
                                          common::Span<FeatureType const> ft,
-                                         std::vector<size_t> columns_size, bool use_group)
+                                         std::vector<bst_idx_t> columns_size, bool use_group)
     : SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
   monitor_.Init(__func__);
   ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) {
-    auto n_bins = std::min(static_cast<size_t>(max_bins_), columns_size_[i]);
+    auto n_bins = std::min(static_cast<bst_idx_t>(max_bins_), columns_size_[i]);
     n_bins = std::max(n_bins, static_cast<decltype(n_bins)>(1));
     auto eps = 1.0 / (static_cast<float>(n_bins) * WQSketch::kFactor);
     if (!IsCat(this->feature_types_, i)) {
diff --git a/src/common/quantile.cu b/src/common/quantile.cu
index 4b110f5e0164..b3dd770ab6a0 100644
--- a/src/common/quantile.cu
+++ b/src/common/quantile.cu
@@ -118,12 +118,12 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
     Span<SketchEntry const> const &d_y, Span<bst_row_t const> const &y_ptr,
     Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
   auto x_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple(
-      dh::MakeTransformIterator<bst_row_t>(
+      dh::MakeTransformIterator<bst_idx_t>(
           thrust::make_counting_iterator(0ul),
           [=] __device__(size_t idx) { return dh::SegmentId(x_ptr, idx); }),
       d_x.data()));
   auto y_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple(
-      dh::MakeTransformIterator<bst_row_t>(
+      dh::MakeTransformIterator<bst_idx_t>(
           thrust::make_counting_iterator(0ul),
           [=] __device__(size_t idx) { return dh::SegmentId(y_ptr, idx); }),
       d_y.data()));
diff --git a/src/common/quantile.cuh b/src/common/quantile.cuh
index f7124f079b6d..6a5a38613a3c 100644
--- a/src/common/quantile.cuh
+++ b/src/common/quantile.cuh
@@ -32,13 +32,13 @@ struct SketchUnique {
 class SketchContainer {
  public:
   static constexpr float kFactor = WQSketch::kFactor;
-  using OffsetT = bst_row_t;
+  using OffsetT = bst_idx_t;
   static_assert(sizeof(OffsetT) == sizeof(size_t), "Wrong type for sketch element offset.");
 
  private:
   Monitor timer_;
   HostDeviceVector<FeatureType> feature_types_;
-  bst_row_t num_rows_;
+  bst_idx_t num_rows_;
   bst_feature_t num_columns_;
   int32_t num_bins_;
   DeviceOrd device_;
@@ -94,7 +94,7 @@ class SketchContainer {
    * \param device      GPU ID.
    */
   SketchContainer(HostDeviceVector<FeatureType> const& feature_types, int32_t max_bin,
-                  bst_feature_t num_columns, bst_row_t num_rows, DeviceOrd device)
+                  bst_feature_t num_columns, bst_idx_t num_rows, DeviceOrd device)
       : num_rows_{num_rows}, num_columns_{num_columns}, num_bins_{max_bin}, device_{device} {
     CHECK(device.IsCUDA());
     // Initialize Sketches for this dmatrix
diff --git a/src/common/quantile.h b/src/common/quantile.h
index 0af93a03e021..59bc3a4f74b1 100644
--- a/src/common/quantile.h
+++ b/src/common/quantile.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2023 by XGBoost Contributors
+ * Copyright 2014-2024, XGBoost Contributors
  * \file quantile.h
  * \brief util to compute quantiles
  * \author Tianqi Chen
@@ -701,12 +701,12 @@ inline std::vector<float> UnrollGroupWeights(MetaInfo const &info) {
   auto n_groups = group_ptr.size() - 1;
   CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();
 
-  bst_row_t n_samples = info.num_row_;
+  bst_idx_t n_samples = info.num_row_;
   std::vector<float> results(n_samples);
   CHECK_EQ(group_ptr.back(), n_samples)
       << error::GroupSize() << " the number of rows from the data.";
   size_t cur_group = 0;
-  for (bst_row_t i = 0; i < n_samples; ++i) {
+  for (bst_idx_t i = 0; i < n_samples; ++i) {
     results[i] = group_weights[cur_group];
     if (i == group_ptr[cur_group + 1]) {
       cur_group++;
@@ -719,9 +719,9 @@ inline std::vector<float> UnrollGroupWeights(MetaInfo const &info) {
 class HistogramCuts;
 
 template <typename Batch, typename IsValid>
-std::vector<bst_row_t> CalcColumnSize(Batch const &batch, bst_feature_t const n_columns,
+std::vector<bst_idx_t> CalcColumnSize(Batch const &batch, bst_feature_t const n_columns,
                                       size_t const n_threads, IsValid &&is_valid) {
-  std::vector<std::vector<bst_row_t>> column_sizes_tloc(n_threads);
+  std::vector<std::vector<bst_idx_t>> column_sizes_tloc(n_threads);
   for (auto &column : column_sizes_tloc) {
     column.resize(n_columns, 0);
   }
@@ -759,7 +759,7 @@ std::vector<bst_feature_t> LoadBalance(Batch const &batch, size_t nnz, bst_featu
   size_t const entries_per_thread = DivRoundUp(total_entries, nthreads);
 
   // Need to calculate the size for each batch.
-  std::vector<bst_row_t> entries_per_columns = CalcColumnSize(batch, n_columns, nthreads, is_valid);
+  std::vector<bst_idx_t> entries_per_columns = CalcColumnSize(batch, n_columns, nthreads, is_valid);
   std::vector<bst_feature_t> cols_ptr(nthreads + 1, 0);
   size_t count{0};
   size_t current_thread{1};
@@ -791,8 +791,8 @@ class SketchContainerImpl {
   std::vector<std::set<float>> categories_;
   std::vector<FeatureType> const feature_types_;
 
-  std::vector<bst_row_t> columns_size_;
-  int32_t max_bins_;
+  std::vector<bst_idx_t> columns_size_;
+  bst_bin_t max_bins_;
   bool use_group_ind_{false};
   int32_t n_threads_;
   bool has_categorical_{false};
@@ -805,7 +805,7 @@ class SketchContainerImpl {
    * \param max_bins maximum number of bins for each feature.
    * \param use_group whether is assigned to group to data instance.
    */
-  SketchContainerImpl(Context const *ctx, std::vector<bst_row_t> columns_size, int32_t max_bins,
+  SketchContainerImpl(Context const *ctx, std::vector<bst_idx_t> columns_size, bst_bin_t max_bins,
                       common::Span<FeatureType const> feature_types, bool use_group);
 
   static bool UseGroup(MetaInfo const &info) {
@@ -829,8 +829,8 @@ class SketchContainerImpl {
   // Gather sketches from all workers.
   void GatherSketchInfo(Context const *ctx, MetaInfo const &info,
                         std::vector<typename WQSketch::SummaryContainer> const &reduced,
-                        std::vector<bst_row_t> *p_worker_segments,
-                        std::vector<bst_row_t> *p_sketches_scan,
+                        std::vector<bst_idx_t> *p_worker_segments,
+                        std::vector<bst_idx_t> *p_sketches_scan,
                         std::vector<typename WQSketch::Entry> *p_global_sketches);
   // Merge sketches from all workers.
   void AllReduce(Context const *ctx, MetaInfo const &info,
@@ -901,7 +901,7 @@ class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, fl
 
  public:
   HostSketchContainer(Context const *ctx, bst_bin_t max_bins, common::Span<FeatureType const> ft,
-                      std::vector<size_t> columns_size, bool use_group);
+                      std::vector<bst_idx_t> columns_size, bool use_group);
 
   template <typename Batch>
   void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing);
@@ -998,7 +998,7 @@ class SortedSketchContainer : public SketchContainerImpl<WXQuantileSketch<float,
  public:
   explicit SortedSketchContainer(Context const *ctx, int32_t max_bins,
                                  common::Span<FeatureType const> ft,
-                                 std::vector<size_t> columns_size, bool use_group)
+                                 std::vector<bst_idx_t> columns_size, bool use_group)
       : SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
     monitor_.Init(__func__);
     sketches_.resize(columns_size.size());
diff --git a/src/data/adapter.h b/src/data/adapter.h
index e9a4ad9fc748..0ad1e9e3864c 100644
--- a/src/data/adapter.h
+++ b/src/data/adapter.h
@@ -73,11 +73,11 @@ constexpr size_t kAdapterUnknownSize = std::numeric_limits<size_t >::max();
 
 struct COOTuple {
   COOTuple() = default;
-  XGBOOST_DEVICE COOTuple(size_t row_idx, size_t column_idx, float value)
+  XGBOOST_DEVICE COOTuple(bst_idx_t row_idx, bst_idx_t column_idx, float value)
       : row_idx(row_idx), column_idx(column_idx), value(value) {}
 
-  size_t row_idx{0};
-  size_t column_idx{0};
+  bst_idx_t row_idx{0};
+  bst_idx_t column_idx{0};
   float value{0};
 };
 
@@ -136,12 +136,8 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
  public:
   class Line {
    public:
-    Line(size_t row_idx, size_t size, const unsigned* feature_idx,
-         const float* values)
-        : row_idx_(row_idx),
-          size_(size),
-          feature_idx_(feature_idx),
-          values_(values) {}
+    Line(bst_idx_t row_idx, bst_idx_t size, const unsigned* feature_idx, const float* values)
+        : row_idx_(row_idx), size_(size), feature_idx_(feature_idx), values_(values) {}
 
     size_t Size() const { return size_; }
     COOTuple GetElement(size_t idx) const {
@@ -149,8 +145,8 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
     }
 
    private:
-    size_t row_idx_;
-    size_t size_;
+    bst_idx_t row_idx_;
+    bst_idx_t size_;
     const unsigned* feature_idx_;
     const float* values_;
   };
@@ -178,29 +174,25 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
 
 class CSRAdapter : public detail::SingleBatchDataIter<CSRAdapterBatch> {
  public:
-  CSRAdapter(const size_t* row_ptr, const unsigned* feature_idx,
-             const float* values, size_t num_rows, size_t num_elements,
-             size_t num_features)
-      : batch_(row_ptr, feature_idx, values, num_rows, num_elements,
-               num_features),
+  CSRAdapter(const size_t* row_ptr, const unsigned* feature_idx, const float* values,
+             bst_idx_t num_rows, bst_idx_t num_elements, size_t num_features)
+      : batch_(row_ptr, feature_idx, values, num_rows, num_elements, num_features),
         num_rows_(num_rows),
         num_columns_(num_features) {}
   const CSRAdapterBatch& Value() const override { return batch_; }
-  size_t NumRows() const { return num_rows_; }
-  size_t NumColumns() const { return num_columns_; }
+  bst_idx_t NumRows() const { return num_rows_; }
+  bst_idx_t NumColumns() const { return num_columns_; }
 
  private:
   CSRAdapterBatch batch_;
-  size_t num_rows_;
-  size_t num_columns_;
+  bst_idx_t num_rows_;
+  bst_idx_t num_columns_;
 };
 
 class DenseAdapterBatch : public detail::NoMetaInfo {
  public:
-  DenseAdapterBatch(const float* values, size_t num_rows, size_t num_features)
-      : values_(values),
-        num_rows_(num_rows),
-        num_features_(num_features) {}
+  DenseAdapterBatch(const float* values, bst_idx_t num_rows, bst_idx_t num_features)
+      : values_(values), num_rows_(num_rows), num_features_(num_features) {}
 
  private:
   class Line {
@@ -910,7 +902,7 @@ class SparsePageAdapterBatch {
   struct Line {
     Entry const* inst;
     size_t n;
-    bst_row_t ridx;
+    bst_idx_t ridx;
     COOTuple GetElement(size_t idx) const { return {ridx, inst[idx].index, inst[idx].fvalue}; }
     size_t Size() const { return n; }
   };
diff --git a/src/data/data.cc b/src/data/data.cc
index 24b41640c173..b6ecd4db74c2 100644
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -996,7 +996,7 @@ template DMatrix* DMatrix::Create(
 
 SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {
   SparsePage transpose;
-  common::ParallelGroupBuilder<Entry, bst_row_t> builder(&transpose.offset.HostVector(),
+  common::ParallelGroupBuilder<Entry, bst_idx_t> builder(&transpose.offset.HostVector(),
                                                          &transpose.data.HostVector());
   builder.InitBudget(num_columns, n_threads);
   long batch_size = static_cast<long>(this->Size());  // NOLINT(*)
@@ -1192,7 +1192,7 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
 
 void SparsePage::PushCSC(const SparsePage &batch) {
   std::vector<xgboost::Entry>& self_data = data.HostVector();
-  std::vector<bst_row_t>& self_offset = offset.HostVector();
+  std::vector<bst_idx_t>& self_offset = offset.HostVector();
 
   auto const& other_data = batch.data.ConstHostVector();
   auto const& other_offset = batch.offset.ConstHostVector();
@@ -1211,7 +1211,7 @@ void SparsePage::PushCSC(const SparsePage &batch) {
     return;
   }
 
-  std::vector<bst_row_t> offset(other_offset.size());
+  std::vector<bst_idx_t> offset(other_offset.size());
   offset[0] = 0;
 
   std::vector<xgboost::Entry> data(self_data.size() + other_data.size());
diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh
index a5156f585441..2dbc2b1e08ab 100644
--- a/src/data/device_adapter.cuh
+++ b/src/data/device_adapter.cuh
@@ -39,7 +39,7 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
     return {row_idx, column_idx, value};
   }
 
-  [[nodiscard]] __device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
+  [[nodiscard]] __device__ float GetElement(bst_idx_t ridx, bst_feature_t fidx) const {
     auto const& column = columns_[fidx];
     float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
                       ? column(ridx)
@@ -47,8 +47,8 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
     return value;
   }
 
-  [[nodiscard]] XGBOOST_DEVICE bst_row_t NumRows() const { return num_rows_; }
-  [[nodiscard]] XGBOOST_DEVICE bst_row_t NumCols() const { return columns_.size(); }
+  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return num_rows_; }
+  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return columns_.size(); }
 
  private:
   common::Span<ArrayInterface<1>> columns_;
@@ -168,13 +168,13 @@ class CupyAdapterBatch : public detail::NoMetaInfo {
     float value = array_interface_(row_idx, column_idx);
     return {row_idx, column_idx, value};
   }
-  [[nodiscard]] __device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
+  [[nodiscard]] __device__ float GetElement(bst_idx_t ridx, bst_feature_t fidx) const {
     float value = array_interface_(ridx, fidx);
     return value;
   }
 
-  [[nodiscard]] XGBOOST_DEVICE bst_row_t NumRows() const { return array_interface_.Shape(0); }
-  [[nodiscard]] XGBOOST_DEVICE bst_row_t NumCols() const { return array_interface_.Shape(1); }
+  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return array_interface_.Shape(0); }
+  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return array_interface_.Shape(1); }
 
  private:
   ArrayInterface<2> array_interface_;
@@ -231,7 +231,7 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
 
   // Count elements per row
   dh::LaunchN(n_samples * stride, [=] __device__(std::size_t idx) {
-    bst_row_t cnt{0};
+    bst_idx_t cnt{0};
     auto [ridx, fbeg] = linalg::UnravelIndex(idx, n_samples, stride);
     SPAN_CHECK(ridx < n_samples);
     for (bst_feature_t fidx = fbeg; fidx < n_features; fidx += stride) {
@@ -245,10 +245,10 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
               static_cast<unsigned long long>(cnt));  // NOLINT
   });
   dh::XGBCachingDeviceAllocator<char> alloc;
-  bst_row_t row_stride =
+  bst_idx_t row_stride =
       dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
                  thrust::device_pointer_cast(offset.data()) + offset.size(),
-                 static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
+                 static_cast<bst_idx_t>(0), thrust::maximum<bst_row_t>());
   return row_stride;
 }
 
diff --git a/src/data/gradient_index.cc b/src/data/gradient_index.cc
index 88a38d5cce74..493aded70098 100644
--- a/src/data/gradient_index.cc
+++ b/src/data/gradient_index.cc
@@ -193,7 +193,7 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
 
 float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
                                   std::vector<float> const &values, std::vector<float> const &mins,
-                                  bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
+                                  bst_idx_t ridx, bst_feature_t fidx, bool is_cat) const {
   if (is_cat) {
     auto gidx = GetGindex(ridx, fidx);
     if (gidx == -1) {
diff --git a/src/data/gradient_index.h b/src/data/gradient_index.h
index 0bb93fc20900..f1754fe35121 100644
--- a/src/data/gradient_index.h
+++ b/src/data/gradient_index.h
@@ -149,7 +149,7 @@ class GHistIndexMatrix {
   /** @brief max_bin for each feature. */
   bst_bin_t max_numeric_bins_per_feat;
   /** @brief base row index for current page (used by external memory) */
-  bst_row_t base_rowid{0};
+  bst_idx_t base_rowid{0};
 
   [[nodiscard]] bst_bin_t MaxNumBinPerFeat() const {
     return std::max(static_cast<bst_bin_t>(cut.MaxCategory() + 1), max_numeric_bins_per_feat);
@@ -230,7 +230,7 @@ class GHistIndexMatrix {
    */
   [[nodiscard]] std::size_t RowIdx(size_t ridx) const { return row_ptr[ridx - base_rowid]; }
 
-  [[nodiscard]] bst_row_t Size() const { return row_ptr.empty() ? 0 : row_ptr.size() - 1; }
+  [[nodiscard]] bst_idx_t Size() const { return row_ptr.empty() ? 0 : row_ptr.size() - 1; }
   [[nodiscard]] bst_feature_t Features() const { return cut.Ptrs().size() - 1; }
 
   [[nodiscard]] bool ReadColumnPage(common::AlignedResourceReadStream* fi);
@@ -243,7 +243,7 @@ class GHistIndexMatrix {
   [[nodiscard]] float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
   [[nodiscard]] float GetFvalue(std::vector<std::uint32_t> const& ptrs,
                                 std::vector<float> const& values, std::vector<float> const& mins,
-                                bst_row_t ridx, bst_feature_t fidx, bool is_cat) const;
+                                bst_idx_t ridx, bst_feature_t fidx, bool is_cat) const;
 
   [[nodiscard]] common::HistogramCuts& Cuts() { return cut; }
   [[nodiscard]] common::HistogramCuts const& Cuts() const { return cut; }
diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc
index e5aa98278c8e..0d75d0651e26 100644
--- a/src/data/iterative_dmatrix.cc
+++ b/src/data/iterative_dmatrix.cc
@@ -132,7 +132,7 @@ void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
     return HostAdapterDispatch(proxy, [](auto const& value) { return value.NumCols(); });
   };
 
-  std::vector<std::size_t> column_sizes;
+  std::vector<bst_idx_t> column_sizes;
   auto const is_valid = data::IsValidFunctor{missing};
   auto nnz_cnt = [&]() {
     return HostAdapterDispatch(proxy, [&](auto const& value) {
diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc
index 99bf67ba0c86..4df1d5e53738 100644
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -59,7 +59,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
     auto& h_data = out_page.data.HostVector();
     auto& h_offset = out_page.offset.HostVector();
     size_t rptr{0};
-    for (bst_row_t i = 0; i < this->Info().num_row_; i++) {
+    for (bst_idx_t i = 0; i < this->Info().num_row_; i++) {
       auto inst = batch[i];
       auto prev_size = h_data.size();
       std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data),
diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
index 06b8079ee134..8dbd69a9b4bb 100644
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -184,7 +184,7 @@ void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
 static std::size_t constexpr kUnroll = 8;
 
 struct SparsePageView {
-  bst_row_t base_rowid;
+  bst_idx_t base_rowid;
   HostSparsePageView view;
 
   explicit SparsePageView(SparsePage const *p) : base_rowid{p->base_rowid} { view = p->GetView(); }
@@ -193,7 +193,7 @@ struct SparsePageView {
 };
 
 struct SingleInstanceView {
-  bst_row_t base_rowid{};
+  bst_idx_t base_rowid{};
   SparsePage::Inst const &inst;
 
   explicit SingleInstanceView(SparsePage::Inst const &instance) : inst{instance} {}
@@ -292,7 +292,7 @@ class AdapterView {
 
   [[nodiscard]] size_t Size() const { return adapter_->NumRows(); }
 
-  bst_row_t const static base_rowid = 0;  // NOLINT
+  bst_idx_t const static base_rowid = 0;  // NOLINT
 };
 
 template <typename DataView, size_t block_of_rows_size>
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index 7dcb5b5fc0f8..29fbae870270 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -113,7 +113,7 @@ struct SparsePageLoader {
   float* smem;
 
   __device__ SparsePageLoader(SparsePageView data, bool use_shared, bst_feature_t num_features,
-                              bst_row_t num_rows, size_t entry_start, float)
+                              bst_idx_t num_rows, size_t entry_start, float)
       : use_shared(use_shared),
         data(data) {
     extern __shared__ float _smem[];
@@ -146,7 +146,7 @@ struct SparsePageLoader {
 
 struct EllpackLoader {
   EllpackDeviceAccessor const& matrix;
-  XGBOOST_DEVICE EllpackLoader(EllpackDeviceAccessor const& m, bool, bst_feature_t, bst_row_t,
+  XGBOOST_DEVICE EllpackLoader(EllpackDeviceAccessor const& m, bool, bst_feature_t, bst_idx_t,
                                size_t, float)
       : matrix{m} {}
   [[nodiscard]] __device__ __forceinline__ float GetElement(size_t ridx, size_t fidx) const {
@@ -177,7 +177,7 @@ struct DeviceAdapterLoader {
   using BatchT = Batch;
 
   XGBOOST_DEV_INLINE DeviceAdapterLoader(Batch const batch, bool use_shared,
-                                         bst_feature_t num_features, bst_row_t num_rows,
+                                         bst_feature_t num_features, bst_idx_t num_rows,
                                          size_t entry_start, float missing)
       : batch{batch}, columns{num_features}, use_shared{use_shared}, is_valid{missing} {
     extern __shared__ float _smem[];
@@ -215,7 +215,7 @@ struct DeviceAdapterLoader {
 };
 
 template <bool has_missing, bool has_categorical, typename Loader>
-__device__ bst_node_t GetLeafIndex(bst_row_t ridx, TreeView const &tree,
+__device__ bst_node_t GetLeafIndex(bst_idx_t ridx, TreeView const &tree,
                                    Loader *loader) {
   bst_node_t nidx = 0;
   RegTree::Node n = tree.d_tree[nidx];
@@ -230,7 +230,7 @@ __device__ bst_node_t GetLeafIndex(bst_row_t ridx, TreeView const &tree,
 }
 
 template <bool has_missing, typename Loader>
-__device__ float GetLeafWeight(bst_row_t ridx, TreeView const &tree,
+__device__ float GetLeafWeight(bst_idx_t ridx, TreeView const &tree,
                                Loader *loader) {
   bst_node_t nidx = -1;
   if (tree.HasCategoricalSplit()) {
@@ -255,7 +255,7 @@ PredictLeafKernel(Data data, common::Span<const RegTree::Node> d_nodes,
                   size_t tree_begin, size_t tree_end, size_t num_features,
                   size_t num_rows, size_t entry_start, bool use_shared,
                   float missing) {
-  bst_row_t ridx = blockDim.x * blockIdx.x + threadIdx.x;
+  bst_idx_t ridx = blockDim.x * blockIdx.x + threadIdx.x;
   if (ridx >= num_rows) {
     return;
   }
diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc
index 019804eda31c..1b2e9137cc3b 100644
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -34,7 +34,7 @@ Predictor* Predictor::Create(std::string const& name, Context const* ctx) {
 }
 
 template <int32_t D>
-void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_row_t n_samples,
+void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_idx_t n_samples,
                              bst_group_t n_groups) {
   // FIXME: Bindings other than Python doesn't have shape.
   std::string expected{"Invalid shape of base_margin. Expected: (" + std::to_string(n_samples) +
diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h
index 4360c0b1314e..293e7d1d486d 100644
--- a/src/tree/common_row_partitioner.h
+++ b/src/tree/common_row_partitioner.h
@@ -28,7 +28,7 @@ class ColumnSplitHelper {
  public:
   ColumnSplitHelper() = default;
 
-  ColumnSplitHelper(bst_row_t num_row,
+  ColumnSplitHelper(bst_idx_t num_row,
                     common::PartitionBuilder<kPartitionBlockSize>* partition_builder,
                     common::RowSetCollection* row_set_collection)
       : partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
@@ -85,10 +85,10 @@ class ColumnSplitHelper {
 
 class CommonRowPartitioner {
  public:
-  bst_row_t base_rowid = 0;
+  bst_idx_t base_rowid = 0;
 
   CommonRowPartitioner() = default;
-  CommonRowPartitioner(Context const* ctx, bst_row_t num_row, bst_row_t _base_rowid,
+  CommonRowPartitioner(Context const* ctx, bst_idx_t num_row, bst_idx_t _base_rowid,
                        bool is_col_split)
       : base_rowid{_base_rowid}, is_col_split_{is_col_split} {
     row_set_collection_.Clear();
diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu
index 58add0a9354f..718474a3e87a 100644
--- a/src/tree/gpu_hist/gradient_based_sampler.cu
+++ b/src/tree/gpu_hist/gradient_based_sampler.cu
@@ -277,7 +277,7 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c
                                                                 common::Span<GradientPair> gpair,
                                                                 DMatrix* dmat) {
   auto cuctx = ctx->CUDACtx();
-  bst_row_t n_rows = dmat->Info().num_row_;
+  bst_idx_t n_rows = dmat->Info().num_row_;
   size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
       gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_);
 
diff --git a/src/tree/hist/sampler.h b/src/tree/hist/sampler.h
index 803e40d547bf..11b4ac1c6f16 100644
--- a/src/tree/hist/sampler.h
+++ b/src/tree/hist/sampler.h
@@ -54,7 +54,7 @@ inline void SampleGradient(Context const* ctx, TrainParam param,
   if (param.subsample >= 1.0) {
     return;
   }
-  bst_row_t n_samples = out.Shape(0);
+  bst_idx_t n_samples = out.Shape(0);
   auto& rnd = common::GlobalRandom();
 
 #if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu
index 4911cec093c8..d53a25d17399 100644
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -191,7 +191,7 @@ struct GPUHistMakerDevice {
   std::unique_ptr<FeatureGroups> feature_groups;
 
   GPUHistMakerDevice(Context const* ctx, bool is_external_memory,
-                     common::Span<FeatureType const> _feature_types, bst_row_t _n_rows,
+                     common::Span<FeatureType const> _feature_types, bst_idx_t _n_rows,
                      TrainParam _param, std::shared_ptr<common::ColumnSampler> column_sampler,
                      uint32_t n_features, BatchParam batch_param, MetaInfo const& info)
       : evaluator_{_param, n_features, ctx->Device()},
diff --git a/tests/cpp/c_api/test_c_api.cc b/tests/cpp/c_api/test_c_api.cc
index c4c1f0c45f42..8729eba82fc3 100644
--- a/tests/cpp/c_api/test_c_api.cc
+++ b/tests/cpp/c_api/test_c_api.cc
@@ -434,7 +434,7 @@ void MakeLabelForTest(std::shared_ptr<DMatrix> Xy, DMatrixHandle cxy) {
   XGDMatrixSetInfoFromInterface(cxy, "label", s_y_int.c_str());
 }
 
-auto MakeSimpleDMatrixForTest(bst_row_t n_samples, bst_feature_t n_features, Json dconfig) {
+auto MakeSimpleDMatrixForTest(bst_idx_t n_samples, bst_feature_t n_features, Json dconfig) {
   HostDeviceVector<float> storage;
   auto arr_int = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateArrayInterface(&storage);
 
@@ -451,7 +451,7 @@ auto MakeSimpleDMatrixForTest(bst_row_t n_samples, bst_feature_t n_features, Jso
   return std::pair{p_fmat, Xy};
 }
 
-auto MakeQDMForTest(Context const *ctx, bst_row_t n_samples, bst_feature_t n_features,
+auto MakeQDMForTest(Context const *ctx, bst_idx_t n_samples, bst_feature_t n_features,
                     Json dconfig) {
   bst_bin_t n_bins{16};
   dconfig["max_bin"] = Integer{n_bins};
@@ -483,7 +483,7 @@ auto MakeQDMForTest(Context const *ctx, bst_row_t n_samples, bst_feature_t n_fea
   return std::pair{p_fmat, Xy};
 }
 
-auto MakeExtMemForTest(bst_row_t n_samples, bst_feature_t n_features, Json dconfig) {
+auto MakeExtMemForTest(bst_idx_t n_samples, bst_feature_t n_features, Json dconfig) {
   std::size_t n_batches{4};
   NumpyArrayIterForTest iter_0{0.0f, n_samples, n_features, n_batches};
   std::string s_dconfig;
@@ -525,7 +525,7 @@ void CheckResult(Context const *ctx, bst_feature_t n_features, std::shared_ptr<D
 }
 
 void TestXGDMatrixGetQuantileCut(Context const *ctx) {
-  bst_row_t n_samples{1024};
+  bst_idx_t n_samples{1024};
   bst_feature_t n_features{16};
 
   Json dconfig{Object{}};
diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu
index c0d5c5ddc109..624c5b35d591 100644
--- a/tests/cpp/common/test_hist_util.cu
+++ b/tests/cpp/common/test_hist_util.cu
@@ -179,7 +179,7 @@ void TestMixedSketch() {
 TEST(HistUtil, DeviceSketchMixedFeatures) { TestMixedSketch(); }
 
 TEST(HistUtil, RemoveDuplicatedCategories) {
-  bst_row_t n_samples = 512;
+  bst_idx_t n_samples = 512;
   bst_feature_t n_features = 3;
   bst_cat_t n_categories = 5;
 
@@ -208,7 +208,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
       FeatureType::kNumerical, FeatureType::kCategorical, FeatureType::kNumerical};
   ASSERT_EQ(info.feature_types.Size(), n_features);
 
-  HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
+  HostDeviceVector<bst_idx_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
   cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
 
   dh::device_vector<float> weight(n_samples * n_features, 0);
@@ -639,7 +639,7 @@ void TestGetColumnSize(std::size_t n_samples) {
 }  // namespace
 
 TEST(HistUtil, GetColumnSize) {
-  bst_row_t n_samples = 4096;
+  bst_idx_t n_samples = 4096;
   TestGetColumnSize(n_samples);
 }
 
@@ -799,7 +799,7 @@ class DeviceSketchWithHessianTest
   bst_feature_t n_features_ = 5;
   bst_group_t n_groups_{3};
 
-  auto GenerateHessian(Context const* ctx, bst_row_t n_samples) const {
+  auto GenerateHessian(Context const* ctx, bst_idx_t n_samples) const {
     HostDeviceVector<float> hessian;
     auto& h_hess = hessian.HostVector();
     h_hess = GenerateRandomWeights(n_samples);
@@ -844,7 +844,7 @@ class DeviceSketchWithHessianTest
  protected:
   Context ctx_ = MakeCUDACtx(0);
 
-  void TestLTR(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
+  void TestLTR(Context const* ctx, bst_idx_t n_samples, bst_bin_t n_bins,
                std::size_t n_elements) const {
     auto x = GenerateRandom(n_samples, n_features_);
 
@@ -897,7 +897,7 @@ class DeviceSketchWithHessianTest
     }
   }
 
-  void TestRegression(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
+  void TestRegression(Context const* ctx, bst_idx_t n_samples, bst_bin_t n_bins,
                       std::size_t n_elements) const {
     auto x = GenerateRandom(n_samples, n_features_);
     auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
diff --git a/tests/cpp/common/test_quantile.cc b/tests/cpp/common/test_quantile.cc
index 9fa1566ea130..26937be76a9a 100644
--- a/tests/cpp/common/test_quantile.cc
+++ b/tests/cpp/common/test_quantile.cc
@@ -50,7 +50,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
   SimpleLCG lcg;
   SimpleRealUniformDistribution<float> dist(3, 1000);
   std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
-  std::vector<bst_row_t> column_size(cols, rows);
+  std::vector<bst_idx_t> column_size(cols, rows);
   bst_bin_t n_bins = 64;
 
   // Generate cuts for distributed environment.
@@ -192,7 +192,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
     return dmat->SliceCol(world, rank);
   }()};
 
-  std::vector<bst_row_t> column_size(cols, 0);
+  std::vector<bst_idx_t> column_size(cols, 0);
   auto const slice_size = cols / world;
   auto const slice_start = slice_size * rank;
   auto const slice_end = (rank == world - 1) ? cols : slice_start + slice_size;
diff --git a/tests/cpp/data/test_adapter.cc b/tests/cpp/data/test_adapter.cc
index fa3ed61f6808..f34cfceed2f3 100644
--- a/tests/cpp/data/test_adapter.cc
+++ b/tests/cpp/data/test_adapter.cc
@@ -36,7 +36,7 @@ TEST(Adapter, CSRAdapter) {
 }
 
 TEST(Adapter, CSRArrayAdapter) {
-  HostDeviceVector<bst_row_t> indptr;
+  HostDeviceVector<std::size_t> indptr;
   HostDeviceVector<float> values;
   HostDeviceVector<bst_feature_t> indices;
   size_t n_features = 100, n_samples = 10;
@@ -155,7 +155,7 @@ TEST(Adapter, IteratorAdapter) {
   ASSERT_EQ(data->Info().num_row_, kRows);
   int num_batch = 0;
   for (auto const& batch : data->GetBatches<SparsePage>()) {
-    ASSERT_EQ(batch.offset.HostVector(), std::vector<bst_row_t>({0, 2, 4, 5, 5, 7, 9, 10, 10}));
+    ASSERT_EQ(batch.offset.HostVector(), std::vector<bst_idx_t>({0, 2, 4, 5, 5, 7, 9, 10, 10}));
     ++num_batch;
   }
   ASSERT_EQ(num_batch, 1);
diff --git a/tests/cpp/data/test_data.cc b/tests/cpp/data/test_data.cc
index 99cd72cc09a0..f9e34790d4a3 100644
--- a/tests/cpp/data/test_data.cc
+++ b/tests/cpp/data/test_data.cc
@@ -13,7 +13,7 @@
 
 namespace xgboost {
 TEST(SparsePage, PushCSC) {
-  std::vector<bst_row_t> offset {0};
+  std::vector<bst_idx_t> offset {0};
   std::vector<Entry> data;
   SparsePage batch;
   batch.offset.HostVector() = offset;
diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc
index 9229832c0eba..0e63ab8f8878 100644
--- a/tests/cpp/data/test_metainfo.cc
+++ b/tests/cpp/data/test_metainfo.cc
@@ -231,7 +231,7 @@ TEST(MetaInfo, LoadQid) {
   const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};
   CHECK(info.group_ptr_ == expected_group_ptr);
 
-  const std::vector<xgboost::bst_row_t> expected_offset{
+  const std::vector<xgboost::bst_idx_t> expected_offset{
     0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60
   };
   const std::vector<xgboost::Entry> expected_data{
diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc
index fa4165796bd9..6334d96c6d17 100644
--- a/tests/cpp/data/test_simple_dmatrix.cc
+++ b/tests/cpp/data/test_simple_dmatrix.cc
@@ -223,7 +223,7 @@ TEST(SimpleDMatrix, FromFile) {
     auto batch = page.GetView();
     EXPECT_EQ(batch.Size(), kExpectedNumRow);
     EXPECT_EQ(page.offset.HostVector(),
-              std::vector<bst_row_t>({0, 3, 6, 9, 12, 15, 15}));
+              std::vector<bst_idx_t>({0, 3, 6, 9, 12, 15, 15}));
     EXPECT_EQ(page.base_rowid, 0);
 
     for (auto i = 0ull; i < batch.Size() - 1; i++) {
diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc
index dac1f1cf7458..8f15880778a4 100644
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -171,7 +171,7 @@ TEST(GBTree, ChoosePredictor) {
 }
 
 TEST(GBTree, ChooseTreeMethod) {
-  bst_row_t n_samples{128};
+  bst_idx_t n_samples{128};
   bst_feature_t n_features{64};
   auto Xy = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateDMatrix(true);
 
diff --git a/tests/cpp/gbm/test_gbtree.cu b/tests/cpp/gbm/test_gbtree.cu
index f308e3b3ea36..227e07ffd3fd 100644
--- a/tests/cpp/gbm/test_gbtree.cu
+++ b/tests/cpp/gbm/test_gbtree.cu
@@ -18,7 +18,7 @@
 namespace xgboost {
 void TestInplaceFallback(Context const* ctx) {
   // prepare data
-  bst_row_t n_samples{1024};
+  bst_idx_t n_samples{1024};
   bst_feature_t n_features{32};
   HostDeviceVector<float> X_storage;
   // use a different device than the learner
diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc
index 6ce362f46763..dc99f76aaad8 100644
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@@ -216,7 +216,7 @@ SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
 static_assert(SimpleLCG::max() - SimpleLCG::min());
 
 void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const {
-  RandomDataGenerator{static_cast<bst_row_t>(p_fmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
+  RandomDataGenerator{static_cast<bst_idx_t>(p_fmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
       p_fmat->Info().labels.Data());
   CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
   p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
@@ -334,7 +334,7 @@ std::string RandomDataGenerator::GenerateColumnarArrayInterface(
 }
 
 void RandomDataGenerator::GenerateCSR(
-    HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
+    HostDeviceVector<float>* value, HostDeviceVector<std::size_t>* row_ptr,
     HostDeviceVector<bst_feature_t>* columns) const {
   auto& h_value = value->HostVector();
   auto& h_rptr = row_ptr->HostVector();
@@ -381,7 +381,7 @@ void RandomDataGenerator::GenerateCSR(
 [[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(
     bool with_label, bool float_label, size_t classes, DataSplitMode data_split_mode) const {
   HostDeviceVector<float> data;
-  HostDeviceVector<bst_row_t> rptrs;
+  HostDeviceVector<std::size_t> rptrs;
   HostDeviceVector<bst_feature_t> columns;
   this->GenerateCSR(&data, &rptrs, &columns);
   data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), rows_,
@@ -447,7 +447,7 @@ void RandomDataGenerator::GenerateCSR(
 
   // Loop over the batches and count the number of pages
   std::size_t batch_count = 0;
-  bst_row_t row_count = 0;
+  bst_idx_t row_count = 0;
   for (const auto& batch : dmat->GetBatches<xgboost::SparsePage>()) {
     batch_count++;
     row_count += batch.Size();
@@ -458,7 +458,7 @@ void RandomDataGenerator::GenerateCSR(
   EXPECT_EQ(row_count, dmat->Info().num_row_);
 
   if (with_label) {
-    RandomDataGenerator{static_cast<bst_row_t>(dmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
+    RandomDataGenerator{static_cast<bst_idx_t>(dmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
         dmat->Info().labels.Data());
     CHECK_EQ(dmat->Info().labels.Size(), this->rows_ * this->n_targets_);
     dmat->Info().labels.Reshape(this->rows_, this->n_targets_);
@@ -488,7 +488,7 @@ int CudaArrayIterForTest::Next() {
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 
-NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
+NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols,
                                              size_t batches)
     : ArrayIterForTest{sparsity, rows, cols, batches} {
   rng_->Device(DeviceOrd::CPU());
@@ -515,7 +515,7 @@ std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::si
   return p_fmat;
 }
 
-std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
+std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_idx_t n_samples, bst_feature_t n_features,
                                                  size_t n_batches, std::string prefix) {
   CHECK_GE(n_samples, n_batches);
   NumpyArrayIterForTest iter(0, n_samples, n_features, n_batches);
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index 56b9d773950d..3f93321f0fb5 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -223,7 +223,7 @@ Json GetArrayInterface(HostDeviceVector<T> const* storage, size_t rows, size_t c
 
 // Generate in-memory random data without using DMatrix.
 class RandomDataGenerator {
-  bst_row_t rows_;
+  bst_idx_t rows_;
   size_t cols_;
   float sparsity_;
 
@@ -246,7 +246,7 @@ class RandomDataGenerator {
   void GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const;
 
  public:
-  RandomDataGenerator(bst_row_t rows, size_t cols, float sparsity)
+  RandomDataGenerator(bst_idx_t rows, size_t cols, float sparsity)
       : rows_{rows}, cols_{cols}, sparsity_{sparsity}, lcg_{seed_} {}
 
   RandomDataGenerator& Lower(float v) {
@@ -308,7 +308,7 @@ class RandomDataGenerator {
 
   std::string GenerateColumnarArrayInterface(std::vector<HostDeviceVector<float>>* data) const;
 
-  void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
+  void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<std::size_t>* row_ptr,
                    HostDeviceVector<bst_feature_t>* columns) const;
 
   [[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(
@@ -354,7 +354,7 @@ std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::si
  *
  * \return A Sparse DMatrix with n_batches.
  */
-std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
+std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_idx_t n_samples, bst_feature_t n_features,
                                                  size_t n_batches, std::string prefix = "cache");
 
 /**
@@ -413,12 +413,12 @@ inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_row
   return gpair;
 }
 
-inline linalg::Matrix<GradientPair> GenerateRandomGradients(Context const* ctx, bst_row_t n_rows,
+inline linalg::Matrix<GradientPair> GenerateRandomGradients(Context const* ctx, bst_idx_t n_rows,
                                                             bst_target_t n_targets,
                                                             float lower = 0.0f,
                                                             float upper = 1.0f) {
   auto g = GenerateRandomGradients(n_rows * n_targets, lower, upper);
-  linalg::Matrix<GradientPair> gpair({n_rows, static_cast<bst_row_t>(n_targets)}, ctx->Device());
+  linalg::Matrix<GradientPair> gpair({n_rows, static_cast<bst_idx_t>(n_targets)}, ctx->Device());
   gpair.Data()->Copy(g);
   return gpair;
 }
@@ -439,7 +439,7 @@ class ArrayIterForTest {
   size_t n_batches_;
 
  public:
-  size_t static constexpr Rows() { return 1024; }
+  bst_idx_t static constexpr Rows() { return 1024; }
   size_t static constexpr Batches() { return 100; }
   size_t static constexpr Cols() { return 13; }
 
@@ -470,7 +470,7 @@ class CudaArrayIterForTest : public ArrayIterForTest {
 
 class NumpyArrayIterForTest : public ArrayIterForTest {
  public:
-  explicit NumpyArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
+  explicit NumpyArrayIterForTest(float sparsity, bst_idx_t rows = Rows(), size_t cols = Cols(),
                                  size_t batches = Batches());
   explicit NumpyArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
                                  std::size_t n_samples, bst_feature_t n_features,
diff --git a/tests/cpp/histogram_helpers.h b/tests/cpp/histogram_helpers.h
index 496aa30f3475..8f345484d06b 100644
--- a/tests/cpp/histogram_helpers.h
+++ b/tests/cpp/histogram_helpers.h
@@ -47,7 +47,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
           0.26f, 0.71f, 1.83f});
   cmat.SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
 
-  bst_row_t row_stride = 0;
+  bst_idx_t row_stride = 0;
   const auto &offset_vec = batch.offset.ConstHostVector();
   for (size_t i = 1; i < offset_vec.size(); ++i) {
     row_stride = std::max(row_stride, offset_vec[i] - offset_vec[i-1]);
diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc
index 669827ee4e92..46b0859162e2 100644
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -65,7 +65,7 @@ TEST(CpuPredictor, ExternalMemory) {
 }
 
 TEST(CpuPredictor, InplacePredict) {
-  bst_row_t constexpr kRows{128};
+  bst_idx_t constexpr kRows{128};
   bst_feature_t constexpr kCols{64};
   Context ctx;
   auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
@@ -83,7 +83,7 @@ TEST(CpuPredictor, InplacePredict) {
 
   {
     HostDeviceVector<float> data;
-    HostDeviceVector<bst_row_t> rptrs;
+    HostDeviceVector<std::size_t> rptrs;
     HostDeviceVector<bst_feature_t> columns;
     gen.GenerateCSR(&data, &rptrs, &columns);
     auto data_interface = GetArrayInterface(&data, kRows * kCols, 1);
diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc
index 0d715760853b..4108d74b8db9 100644
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -186,7 +186,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
   }
 }
 
-void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
+void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_idx_t rows,
                            bst_feature_t cols) {
   std::size_t constexpr kClasses { 4 };
   auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device());
@@ -255,7 +255,7 @@ std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMat
   return learner;
 }
 
-void VerifyPredictionWithLesserFeatures(Learner *learner, bst_row_t kRows,
+void VerifyPredictionWithLesserFeatures(Learner *learner, bst_idx_t kRows,
                                         std::shared_ptr<DMatrix> m_test,
                                         std::shared_ptr<DMatrix> m_invalid) {
   HostDeviceVector<float> prediction;
diff --git a/tests/cpp/predictor/test_predictor.h b/tests/cpp/predictor/test_predictor.h
index a65b60579e61..1ccd35102b2d 100644
--- a/tests/cpp/predictor/test_predictor.h
+++ b/tests/cpp/predictor/test_predictor.h
@@ -92,7 +92,7 @@ void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
                             std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist,
                             bool check_contribs = false);
 
-void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
+void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_idx_t rows,
                            bst_feature_t cols);
 
 void TestPredictionWithLesserFeatures(Context const* ctx);
diff --git a/tests/cpp/test_helpers.cc b/tests/cpp/test_helpers.cc
index 79d8d2475181..f582ba564b61 100644
--- a/tests/cpp/test_helpers.cc
+++ b/tests/cpp/test_helpers.cc
@@ -11,7 +11,7 @@ TEST(RandomDataGenerator, DMatrix) {
   auto p_dmatrix = RandomDataGenerator{kRows, kCols, kSparsity}.GenerateDMatrix();
 
   HostDeviceVector<float> csr_value;
-  HostDeviceVector<bst_row_t> csr_rptr;
+  HostDeviceVector<std::size_t> csr_rptr;
   HostDeviceVector<bst_feature_t> csr_cidx;
   RandomDataGenerator{kRows, kCols, kSparsity}.GenerateCSR(&csr_value, &csr_rptr, &csr_cidx);
 
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index 2429e09eb6bf..6fe65b97e2f0 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -217,7 +217,7 @@ TEST(Learner, JsonModelIO) {
 }
 
 TEST(Learner, ConfigIO) {
-  bst_row_t n_samples = 128;
+  bst_idx_t n_samples = 128;
   bst_feature_t n_features = 12;
   std::shared_ptr<DMatrix> p_fmat{
       RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true, false, 2)};
diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc
index 25a800367c49..5b48f2793b43 100644
--- a/tests/cpp/tree/hist/test_histogram.cc
+++ b/tests/cpp/tree/hist/test_histogram.cc
@@ -409,9 +409,9 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
     batch_param.hess = hess;
   }
 
-  std::vector<std::size_t> partition_size(1, 0);
+  std::vector<bst_idx_t> partition_size(1, 0);
   bst_bin_t total_bins{0};
-  bst_row_t n_samples{0};
+  bst_idx_t n_samples{0};
 
   auto gpair = GenerateRandomGradients(m->Info().num_row_, 0.0, 1.0);
   auto const &h_gpair = gpair.HostVector();
diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu
index 6f937351ea23..aaeba13f13c5 100644
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -440,7 +440,7 @@ RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
   return tree;
 }
 
-void VerifyHistColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
+void VerifyHistColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) {
   Context ctx(MakeCUDACtx(GPUIDX));
 
   auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
@@ -490,7 +490,7 @@ RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
   return tree;
 }
 
-void VerifyApproxColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
+void VerifyApproxColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) {
   Context ctx(MakeCUDACtx(GPUIDX));
 
   auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc
index 4021c9959440..1c365100574b 100644
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -201,7 +201,7 @@ TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEn
 TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
 
 namespace {
-void VerifyColumnSplit(Context const* ctx, bst_row_t rows, bst_feature_t cols, bst_target_t n_targets,
+void VerifyColumnSplit(Context const* ctx, bst_idx_t rows, bst_feature_t cols, bst_target_t n_targets,
                        RegTree const& expected_tree) {
   auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
   linalg::Matrix<GradientPair> gpair = GenerateRandomGradients(ctx, rows, n_targets);
diff --git a/tests/cpp/tree/test_refresh.cc b/tests/cpp/tree/test_refresh.cc
index c8859c898519..bbd274a08d0f 100644
--- a/tests/cpp/tree/test_refresh.cc
+++ b/tests/cpp/tree/test_refresh.cc
@@ -15,7 +15,7 @@
 
 namespace xgboost::tree {
 TEST(Updater, Refresh) {
-  bst_row_t constexpr kRows = 8;
+  bst_idx_t constexpr kRows = 8;
   bst_feature_t constexpr kCols = 16;
   Context ctx;
 

From f75123529e5abb1b07dab42e0c1afcff8418cc32 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 13 Mar 2024 18:26:46 +0800
Subject: [PATCH 3/5] override.

---
 include/xgboost/json.h         | 10 ++++++++--
 src/predictor/cpu_predictor.cc |  2 +-
 tests/cpp/helpers.cc           |  2 +-
 tests/cpp/helpers.h            |  4 ++--
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/xgboost/json.h b/include/xgboost/json.h
index 77ca6a510c96..1416b8899785 100644
--- a/include/xgboost/json.h
+++ b/include/xgboost/json.h
@@ -60,9 +60,7 @@ class Value {
   virtual Json& operator[](int ind);
 
   virtual bool operator==(Value const& rhs) const = 0;
-#if !defined(__APPLE__)
   virtual Value& operator=(Value const& rhs) = delete;
-#endif  // !defined(__APPLE__)
 
   std::string TypeStr() const;
 
@@ -105,6 +103,7 @@ class JsonString : public Value {
   std::string&       GetString()       & { return str_; }
 
   bool operator==(Value const& rhs) const override;
+  Value& operator=(Value const& rhs) override = delete;
 
   static bool IsClassOf(Value const* value) {
     return value->Type() == ValueKind::kString;
@@ -134,6 +133,7 @@ class JsonArray : public Value {
   std::vector<Json>&       GetArray()       & { return vec_; }
 
   bool operator==(Value const& rhs) const override;
+  Value& operator=(Value const& rhs) override = delete;
 
   static bool IsClassOf(Value const* value) {
     return value->Type() == ValueKind::kArray;
@@ -158,6 +158,7 @@ class JsonTypedArray : public Value {
   JsonTypedArray(JsonTypedArray&& that) noexcept : Value{kind}, vec_{std::move(that.vec_)} {}
 
   bool operator==(Value const& rhs) const override;
+  Value& operator=(Value const& rhs) override = delete;
 
   void Set(size_t i, T v) { vec_[i] = v; }
   size_t Size() const { return vec_.size(); }
@@ -216,6 +217,7 @@ class JsonObject : public Value {
   Map& GetObject() & { return object_; }
 
   bool operator==(Value const& rhs) const override;
+  Value& operator=(Value const& rhs) override = delete;
 
   static bool IsClassOf(Value const* value) { return value->Type() == ValueKind::kObject; }
   ~JsonObject() override = default;
@@ -249,6 +251,7 @@ class JsonNumber : public Value {
   Float&       GetNumber()       & { return number_; }
 
   bool operator==(Value const& rhs) const override;
+  Value& operator=(Value const& rhs) override = delete;
 
   static bool IsClassOf(Value const* value) {
     return value->Type() == ValueKind::kNumber;
@@ -287,6 +290,7 @@ class JsonInteger : public Value {
       : Value{ValueKind::kInteger}, integer_{that.integer_} {}
 
   bool operator==(Value const& rhs) const override;
+  Value& operator=(Value const& rhs) override = delete;
 
   Int const& GetInteger() &&      { return integer_; }
   Int const& GetInteger() const & { return integer_; }
@@ -307,6 +311,7 @@ class JsonNull : public Value {
   void Save(JsonWriter* writer) const override;
 
   bool operator==(Value const& rhs) const override;
+  Value& operator=(Value const& rhs) override = delete;
 
   static bool IsClassOf(Value const* value) {
     return value->Type() == ValueKind::kNull;
@@ -336,6 +341,7 @@ class JsonBoolean : public Value {
   bool&       GetBoolean()       & { return boolean_; }
 
   bool operator==(Value const& rhs) const override;
+  Value& operator=(Value const& rhs) override = delete;
 
   static bool IsClassOf(Value const* value) {
     return value->Type() == ValueKind::kBoolean;
diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
index 8dbd69a9b4bb..f253493fc974 100644
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -214,7 +214,7 @@ struct GHistIndexMatrixView {
   std::vector<float> const& values_;
 
  public:
-  size_t base_rowid;
+  bst_idx_t base_rowid;
 
  public:
   GHistIndexMatrixView(GHistIndexMatrix const &_page, uint64_t n_feat,
diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc
index dc99f76aaad8..6a89207e021f 100644
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@@ -662,7 +662,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
   return gbm;
 }
 
-ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
+ArrayIterForTest::ArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols, size_t batches)
     : rows_{rows}, cols_{cols}, n_batches_{batches} {
   XGProxyDMatrixCreate(&proxy_);
   rng_ = std::make_unique<RandomDataGenerator>(rows_, cols_, sparsity);
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index 3f93321f0fb5..c161856bbbe4 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -434,7 +434,7 @@ class ArrayIterForTest {
 
   std::vector<std::string> batches_;
   std::string interface_;
-  size_t rows_;
+  bst_idx_t rows_;
   size_t cols_;
   size_t n_batches_;
 
@@ -451,7 +451,7 @@ class ArrayIterForTest {
   [[nodiscard]] std::size_t Iter() const { return iter_; }
   auto Proxy() -> decltype(proxy_) { return proxy_; }
 
-  explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches);
+  explicit ArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols, size_t batches);
   /**
    * \brief Create iterator with user provided data.
    */

From 3039f96b6d99e3e21c0192f9fb97b15441a1f870 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 13 Mar 2024 18:29:18 +0800
Subject: [PATCH 4/5] fix.

---
 plugin/sycl/data/gradient_index.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugin/sycl/data/gradient_index.cc b/plugin/sycl/data/gradient_index.cc
index 49b66a71052f..e193b66894c9 100644
--- a/plugin/sycl/data/gradient_index.cc
+++ b/plugin/sycl/data/gradient_index.cc
@@ -57,7 +57,7 @@ void GHistIndexMatrix::SetIndexData(::sycl::queue qu,
                                     uint32_t* offsets) {
   if (nbins == 0) return;
   const xgboost::Entry *data_ptr = dmat.data.DataConst();
-  const bst_row_t *offset_vec = dmat.row_ptr.DataConst();
+  const bst_idx_t *offset_vec = dmat.row_ptr.DataConst();
   const size_t num_rows = dmat.row_ptr.Size() - 1;
   const bst_float* cut_values = cut_device.Values().DataConst();
   const uint32_t* cut_ptrs = cut_device.Ptrs().DataConst();

From 6c0e235fdafd267a493eb548722921353e8aba6a Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 13 Mar 2024 19:06:44 +0800
Subject: [PATCH 5/5] Fix.

---
 src/collective/device_communicator_adapter.cuh  |  2 ++
 src/common/device_helpers.cuh                   |  3 ---
 src/common/hist_util.cu                         |  4 +---
 src/common/hist_util.cuh                        |  2 +-
 src/common/host_device_vector.cc                |  2 +-
 src/common/host_device_vector.cu                |  2 +-
 src/common/quantile.cu                          | 10 +++++-----
 src/data/data.cc                                |  2 +-
 src/data/device_adapter.cuh                     |  6 +++---
 src/data/simple_dmatrix.cuh                     |  2 +-
 src/predictor/gpu_predictor.cu                  | 14 +++++++-------
 src/predictor/predictor.cc                      |  2 +-
 tests/cpp/common/test_hist_util.cu              |  8 ++++----
 tests/cpp/common/test_quantile.cu               | 14 +++++++-------
 tests/cpp/common/test_span.cu                   | 11 ++++++-----
 tests/cpp/data/test_device_adapter.cu           |  2 +-
 tests/cpp/plugin/test_sycl_predictor.cc         |  4 ++--
 tests/cpp/tree/gpu_hist/test_evaluate_splits.cu |  8 ++++----
 18 files changed, 48 insertions(+), 50 deletions(-)

diff --git a/src/collective/device_communicator_adapter.cuh b/src/collective/device_communicator_adapter.cuh
index 7d3e836a0ec9..647c74b4e856 100644
--- a/src/collective/device_communicator_adapter.cuh
+++ b/src/collective/device_communicator_adapter.cuh
@@ -3,6 +3,8 @@
  */
 #pragma once
 
+#include <numeric>  // for accumulate
+
 #include "communicator.h"
 #include "device_communicator.cuh"
 
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
index 46f76c41589d..026fbacf24be 100644
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -19,11 +19,9 @@
 #include <thrust/unique.h>
 
 #include <algorithm>
-#include <chrono>
 #include <cstddef>  // for size_t
 #include <cub/cub.cuh>
 #include <cub/util_allocator.cuh>
-#include <numeric>
 #include <sstream>
 #include <string>
 #include <tuple>
@@ -31,7 +29,6 @@
 
 #include "../collective/communicator-inl.h"
 #include "common.h"
-#include "xgboost/global_config.h"
 #include "xgboost/host_device_vector.h"
 #include "xgboost/logging.h"
 #include "xgboost/span.h"
diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu
index a2fda7017b8b..39f310ebb66a 100644
--- a/src/common/hist_util.cu
+++ b/src/common/hist_util.cu
@@ -13,8 +13,6 @@
 #include <xgboost/logging.h>
 
 #include <cstddef>  // for size_t
-#include <memory>
-#include <mutex>
 #include <utility>
 #include <vector>
 
@@ -123,7 +121,7 @@ void SortByWeight(dh::device_vector<float>* weights, dh::device_vector<Entry>* s
       [=] __device__(const Entry& a, const Entry& b) { return a.index == b.index; });
 }
 
-void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_row_t> d_cuts_ptr,
+void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_idx_t> d_cuts_ptr,
                                 dh::device_vector<Entry>* p_sorted_entries,
                                 dh::device_vector<float>* p_sorted_weights,
                                 dh::caching_device_vector<size_t>* p_column_sizes_scan) {
diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh
index e899f2a1ce07..fe3771924043 100644
--- a/src/common/hist_util.cuh
+++ b/src/common/hist_util.cuh
@@ -240,7 +240,7 @@ void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter, Ran
 void SortByWeight(dh::device_vector<float>* weights,
                   dh::device_vector<Entry>* sorted_entries);
 
-void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_row_t> d_cuts_ptr,
+void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_idx_t> d_cuts_ptr,
                                 dh::device_vector<Entry>* p_sorted_entries,
                                 dh::device_vector<float>* p_sorted_weights,
                                 dh::caching_device_vector<size_t>* p_column_sizes_scan);
diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc
index a7a996c6c1ff..f4973c0428f0 100644
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@@ -178,7 +178,7 @@ template class HostDeviceVector<uint8_t>;
 template class HostDeviceVector<int8_t>;
 template class HostDeviceVector<FeatureType>;
 template class HostDeviceVector<Entry>;
-template class HostDeviceVector<uint64_t>;  // bst_row_t
+template class HostDeviceVector<bst_idx_t>;
 template class HostDeviceVector<uint32_t>;  // bst_feature_t
 
 #if defined(__APPLE__) || defined(__EMSCRIPTEN__)
diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu
index 4933a4b11344..267309288522 100644
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -412,7 +412,7 @@ template class HostDeviceVector<uint8_t>;
 template class HostDeviceVector<int8_t>;
 template class HostDeviceVector<FeatureType>;
 template class HostDeviceVector<Entry>;
-template class HostDeviceVector<uint64_t>;  // bst_row_t
+template class HostDeviceVector<bst_idx_t>;
 template class HostDeviceVector<uint32_t>;  // bst_feature_t
 template class HostDeviceVector<RegTree::Node>;
 template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
diff --git a/src/common/quantile.cu b/src/common/quantile.cu
index b3dd770ab6a0..0a7d0b0785e8 100644
--- a/src/common/quantile.cu
+++ b/src/common/quantile.cu
@@ -114,9 +114,9 @@ void CopyTo(Span<T> out, Span<U> src) {
 
 // Compute the merge path.
 common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
-    Span<SketchEntry const> const &d_x, Span<bst_row_t const> const &x_ptr,
-    Span<SketchEntry const> const &d_y, Span<bst_row_t const> const &y_ptr,
-    Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
+    Span<SketchEntry const> const &d_x, Span<bst_idx_t const> const &x_ptr,
+    Span<SketchEntry const> const &d_y, Span<bst_idx_t const> const &y_ptr,
+    Span<SketchEntry> out, Span<bst_idx_t> out_ptr) {
   auto x_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple(
       dh::MakeTransformIterator<bst_idx_t>(
           thrust::make_counting_iterator(0ul),
@@ -206,8 +206,8 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
 // run it in 2 passes to obtain the merge path and then customize the standard merge
 // algorithm.
 void MergeImpl(DeviceOrd device, Span<SketchEntry const> const &d_x,
-               Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
-               Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
+               Span<bst_idx_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
+               Span<bst_idx_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_idx_t> out_ptr) {
   dh::safe_cuda(cudaSetDevice(device.ordinal));
   CHECK_EQ(d_x.size() + d_y.size(), out.size());
   CHECK_EQ(x_ptr.size(), out_ptr.size());
diff --git a/src/data/data.cc b/src/data/data.cc
index b6ecd4db74c2..8cdcde201c79 100644
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -47,7 +47,7 @@
 #include "simple_dmatrix.h"                  // for SimpleDMatrix
 #include "sparse_page_writer.h"              // for SparsePageFormatReg
 #include "validation.h"                      // for LabelsCheck, WeightsCheck, ValidateQueryGroup
-#include "xgboost/base.h"                    // for bst_group_t, bst_row_t, bst_float, bst_ulong
+#include "xgboost/base.h"                    // for bst_group_t, bst_idx_t, bst_float, bst_ulong
 #include "xgboost/context.h"                 // for Context
 #include "xgboost/host_device_vector.h"      // for HostDeviceVector
 #include "xgboost/learner.h"                 // for HostDeviceVector
diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh
index 2dbc2b1e08ab..bc012fd9b439 100644
--- a/src/data/device_adapter.cuh
+++ b/src/data/device_adapter.cuh
@@ -208,8 +208,8 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
 
 // Returns maximum row length
 template <typename AdapterBatchT>
-std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, DeviceOrd device,
-                         float missing) {
+bst_idx_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_idx_t> offset, DeviceOrd device,
+                       float missing) {
   dh::safe_cuda(cudaSetDevice(device.ordinal));
   IsValidFunctor is_valid(missing);
   dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
@@ -248,7 +248,7 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
   bst_idx_t row_stride =
       dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
                  thrust::device_pointer_cast(offset.data()) + offset.size(),
-                 static_cast<bst_idx_t>(0), thrust::maximum<bst_row_t>());
+                 static_cast<bst_idx_t>(0), thrust::maximum<bst_idx_t>());
   return row_stride;
 }
 
diff --git a/src/data/simple_dmatrix.cuh b/src/data/simple_dmatrix.cuh
index 528bea8be80a..e3c241886007 100644
--- a/src/data/simple_dmatrix.cuh
+++ b/src/data/simple_dmatrix.cuh
@@ -40,7 +40,7 @@ void CopyDataToDMatrix(AdapterBatchT batch, common::Span<Entry> data,
 }
 
 template <typename AdapterBatchT>
-void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset, DeviceOrd device,
+void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_idx_t> offset, DeviceOrd device,
                      float missing) {
   dh::safe_cuda(cudaSetDevice(device.ordinal));
   IsValidFunctor is_valid(missing);
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index 29fbae870270..aea1aa95deb1 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -67,12 +67,12 @@ struct TreeView {
 
 struct SparsePageView {
   common::Span<const Entry> d_data;
-  common::Span<const bst_row_t> d_row_ptr;
+  common::Span<const bst_idx_t> d_row_ptr;
   bst_feature_t num_features;
 
   SparsePageView() = default;
   XGBOOST_DEVICE SparsePageView(common::Span<const Entry> data,
-                                common::Span<const bst_row_t> row_ptr,
+                                common::Span<const bst_idx_t> row_ptr,
                                 bst_feature_t num_features)
       : d_data{data}, d_row_ptr{row_ptr}, num_features(num_features) {}
   [[nodiscard]] __device__ float GetElement(size_t ridx, size_t fidx) const {
@@ -664,7 +664,7 @@ __global__ void MaskBitVectorKernel(
   }
 }
 
-__device__ bst_node_t GetLeafIndexByBitVector(bst_row_t ridx, TreeView const& tree,
+__device__ bst_node_t GetLeafIndexByBitVector(bst_idx_t ridx, TreeView const& tree,
                                               BitVector const& decision_bits,
                                               BitVector const& missing_bits, std::size_t num_nodes,
                                               std::size_t tree_offset) {
@@ -682,7 +682,7 @@ __device__ bst_node_t GetLeafIndexByBitVector(bst_row_t ridx, TreeView const& tr
   return nidx;
 }
 
-__device__ float GetLeafWeightByBitVector(bst_row_t ridx, TreeView const& tree,
+__device__ float GetLeafWeightByBitVector(bst_idx_t ridx, TreeView const& tree,
                                           BitVector const& decision_bits,
                                           BitVector const& missing_bits, std::size_t num_nodes,
                                           std::size_t tree_offset) {
@@ -1171,7 +1171,7 @@ class GPUPredictor : public xgboost::Predictor {
     auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device());
 
     const MetaInfo& info = p_fmat->Info();
-    bst_row_t num_rows = info.num_row_;
+    bst_idx_t num_rows = info.num_row_;
     if (tree_end == 0 || tree_end > model.trees.size()) {
       tree_end = static_cast<uint32_t>(model.trees.size());
     }
@@ -1196,7 +1196,7 @@ class GPUPredictor : public xgboost::Predictor {
       for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
         batch.data.SetDevice(ctx_->Device());
         batch.offset.SetDevice(ctx_->Device());
-        bst_row_t batch_offset = 0;
+        bst_idx_t batch_offset = 0;
         SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
                             model.learner_model_param->num_feature};
         size_t num_rows = batch.Size();
@@ -1219,7 +1219,7 @@ class GPUPredictor : public xgboost::Predictor {
       }
     } else {
       for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
-        bst_row_t batch_offset = 0;
+        bst_idx_t batch_offset = 0;
         EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->Device())};
         size_t num_rows = batch.Size();
         auto grid =
diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc
index 1b2e9137cc3b..2a6d1b9c58db 100644
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -9,7 +9,7 @@
 #include <string>                        // for string, to_string
 
 #include "../gbm/gbtree_model.h"         // for GBTreeModel
-#include "xgboost/base.h"                // for bst_float, Args, bst_group_t, bst_row_t
+#include "xgboost/base.h"                // for bst_float, Args, bst_group_t, bst_idx_t
 #include "xgboost/context.h"             // for Context
 #include "xgboost/data.h"                // for MetaInfo
 #include "xgboost/host_device_vector.h"  // for HostDeviceVector
diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu
index 624c5b35d591..73af7115c385 100644
--- a/tests/cpp/common/test_hist_util.cu
+++ b/tests/cpp/common/test_hist_util.cu
@@ -214,7 +214,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
   dh::device_vector<float> weight(n_samples * n_features, 0);
   dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());
 
-  dh::caching_device_vector<bst_row_t> columns_ptr(4);
+  dh::caching_device_vector<bst_idx_t> columns_ptr(4);
   for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
     columns_ptr[i] = i * n_samples;
   }
@@ -795,7 +795,7 @@ TEST(HistUtil, AdapterSketchFromWeights) {
 
 namespace {
 class DeviceSketchWithHessianTest
-    : public ::testing::TestWithParam<std::tuple<bool, bst_row_t, bst_bin_t>> {
+    : public ::testing::TestWithParam<std::tuple<bool, bst_idx_t, bst_bin_t>> {
   bst_feature_t n_features_ = 5;
   bst_group_t n_groups_{3};
 
@@ -910,9 +910,9 @@ class DeviceSketchWithHessianTest
 };
 
 auto MakeParamsForTest() {
-  std::vector<bst_row_t> sizes = {1, 2, 256, 512, 1000, 1500};
+  std::vector<bst_idx_t> sizes = {1, 2, 256, 512, 1000, 1500};
   std::vector<bst_bin_t> bin_sizes = {2, 16, 256, 512};
-  std::vector<std::tuple<bool, bst_row_t, bst_bin_t>> configs;
+  std::vector<std::tuple<bool, bst_idx_t, bst_bin_t>> configs;
   for (auto n_samples : sizes) {
     for (auto n_bins : bin_sizes) {
       configs.emplace_back(true, n_samples, n_bins);
diff --git a/tests/cpp/common/test_quantile.cu b/tests/cpp/common/test_quantile.cu
index 26bd05524ded..070c705b55a6 100644
--- a/tests/cpp/common/test_quantile.cu
+++ b/tests/cpp/common/test_quantile.cu
@@ -27,7 +27,7 @@ TEST(GPUQuantile, Basic) {
   HostDeviceVector<FeatureType> ft;
   SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
   dh::caching_device_vector<Entry> entries;
-  dh::device_vector<bst_row_t> cuts_ptr(kCols+1);
+  dh::device_vector<bst_idx_t> cuts_ptr(kCols+1);
   thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
   // Push empty
   sketch.Push(dh::ToSpan(entries), dh::ToSpan(cuts_ptr), dh::ToSpan(cuts_ptr), 0);
@@ -87,11 +87,11 @@ TEST(GPUQuantile, Unique) {
 
 // if with_error is true, the test tolerates floating point error
 void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
-                          Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
+                          Span<bst_idx_t const> d_columns_ptr, bool with_error = false) {
   dh::safe_cuda(cudaSetDevice(device.ordinal));
   std::vector<SketchEntry> h_in(in.size());
   dh::CopyDeviceSpanToVector(&h_in, in);
-  std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
+  std::vector<bst_idx_t> h_columns_ptr(d_columns_ptr.size());
   dh::CopyDeviceSpanToVector(&h_columns_ptr, d_columns_ptr);
 
   for (size_t i = 1; i < d_columns_ptr.size(); ++i) {
@@ -164,7 +164,7 @@ TEST(GPUQuantile, MergeEmpty) {
 
   std::vector<SketchEntry> entries_before(sketch_0.Data().size());
   dh::CopyDeviceSpanToVector(&entries_before, sketch_0.Data());
-  std::vector<bst_row_t> ptrs_before(sketch_0.ColumnsPtr().size());
+  std::vector<bst_idx_t> ptrs_before(sketch_0.ColumnsPtr().size());
   dh::CopyDeviceSpanToVector(&ptrs_before, sketch_0.ColumnsPtr());
   thrust::device_vector<size_t> columns_ptr(kCols + 1);
   // Merge an empty sketch
@@ -172,7 +172,7 @@ TEST(GPUQuantile, MergeEmpty) {
 
   std::vector<SketchEntry> entries_after(sketch_0.Data().size());
   dh::CopyDeviceSpanToVector(&entries_after, sketch_0.Data());
-  std::vector<bst_row_t> ptrs_after(sketch_0.ColumnsPtr().size());
+  std::vector<bst_idx_t> ptrs_after(sketch_0.ColumnsPtr().size());
   dh::CopyDeviceSpanToVector(&ptrs_after, sketch_0.ColumnsPtr());
 
   CHECK_EQ(entries_before.size(), entries_after.size());
@@ -222,7 +222,7 @@ TEST(GPUQuantile, MergeBasic) {
     }
 
     auto columns_ptr = sketch_0.ColumnsPtr();
-    std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
+    std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());
     dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
     ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
 
@@ -278,7 +278,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
   TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
 
   auto columns_ptr = sketch_0.ColumnsPtr();
-  std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
+  std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());
   dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
   ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
 
diff --git a/tests/cpp/common/test_span.cu b/tests/cpp/common/test_span.cu
index 85c952340659..9c2bdc65cd34 100644
--- a/tests/cpp/common/test_span.cu
+++ b/tests/cpp/common/test_span.cu
@@ -1,14 +1,15 @@
-/*!
- * Copyright 2018 XGBoost contributors
+/**
+ * Copyright 2018-2024, XGBoost contributors
  */
 #include <gtest/gtest.h>
-
-#include <thrust/host_vector.h>
 #include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
+#include <thrust/host_vector.h>
+#include <xgboost/span.h>
+
+#include <numeric>  // for iota
 
 #include "../../../src/common/device_helpers.cuh"
-#include <xgboost/span.h>
 #include "test_span.h"
 
 namespace xgboost {
diff --git a/tests/cpp/data/test_device_adapter.cu b/tests/cpp/data/test_device_adapter.cu
index 2190dbe5bceb..61cc9463c228 100644
--- a/tests/cpp/data/test_device_adapter.cu
+++ b/tests/cpp/data/test_device_adapter.cu
@@ -62,7 +62,7 @@ TEST(DeviceAdapter, GetRowCounts) {
                        .Device(ctx.Device())
                        .GenerateArrayInterface(&storage);
     auto adapter = CupyAdapter{str_arr};
-    HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
+    HostDeviceVector<bst_idx_t> offset(adapter.NumRows() + 1, 0);
     offset.SetDevice(ctx.Device());
     auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
                                 std::numeric_limits<float>::quiet_NaN());
diff --git a/tests/cpp/plugin/test_sycl_predictor.cc b/tests/cpp/plugin/test_sycl_predictor.cc
index d5b3a5e5cd9a..7bd788a3b071 100755
--- a/tests/cpp/plugin/test_sycl_predictor.cc
+++ b/tests/cpp/plugin/test_sycl_predictor.cc
@@ -43,7 +43,7 @@ TEST(SyclPredictor, ExternalMemory) {
 }
 
 TEST(SyclPredictor, InplacePredict) {
-  bst_row_t constexpr kRows{128};
+  bst_idx_t constexpr kRows{128};
   bst_feature_t constexpr kCols{64};
   Context ctx;
   auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
@@ -106,4 +106,4 @@ TEST(SyclPredictor, Multi) {
   TestVectorLeafPrediction(&ctx);
 }
 
-}  // namespace xgboost
\ No newline at end of file
+}  // namespace xgboost
diff --git a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
index 862bc6bfcca9..f4accfc8a7b4 100644
--- a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
+++ b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
@@ -363,7 +363,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
   GPUTrainingParam param{tparam};
 
   thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
-  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2};
+  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2};
   thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
   thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
   auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});
@@ -412,7 +412,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
   GPUTrainingParam param{tparam};
 
   thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{1};
-  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
+  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
   thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
   thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
   auto feature_histogram =
@@ -446,7 +446,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
   GPUTrainingParam param{tparam};
 
   thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
-  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
+  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
   thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
   thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
   auto feature_histogram =
@@ -478,7 +478,7 @@ TEST(GpuHist, EvaluateSplits) {
   GPUTrainingParam param{tparam};
 
   thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
-  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
+  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
   thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
   thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 0.0};
   auto feature_histogram_left =