From b1d0ef737a0911b95a11fd3b04cc8e438ea69501 Mon Sep 17 00:00:00 2001
From: Bobby Wang <wbo4958@gmail.com>
Date: Wed, 17 Jul 2024 14:51:38 +0800
Subject: [PATCH] Gpu: Support rank and regressor (#10560)

---
 .../dmlc/xgboost4j/java/CudfColumnBatch.java  |  16 +-
 .../xgboost4j/scala/QuantileDMatrix.scala     |  11 +-
 .../scala/spark/GpuXGBoostPlugin.scala        |  18 +-
 .../ml/dmlc/xgboost4j/java/BoosterTest.java   |  53 +-
 .../ml/dmlc/xgboost4j/java/DMatrixTest.java   |  99 ++-
 .../src/test/resources/binary.test.parquet    | Bin 5417 -> 0 bytes
 .../src/test/resources/binary.train.parquet   | Bin 5511 -> 0 bytes
 .../test/resources/multiclass.test.parquet    | Bin 5374 -> 0 bytes
 .../test/resources/multiclass.train.parquet   | Bin 5668 -> 0 bytes
 .../test/resources/regression.test.parquet    | Bin 6010 -> 0 bytes
 .../test/resources/regression.train.parquet   | Bin 6008 -> 0 bytes
 .../scala/QuantileDMatrixSuite.scala          |  50 +-
 .../scala/spark/GpuXGBoostPluginSuite.scala   | 376 ++++++++-
 .../xgboost4j/scala/spark/TrainTestData.scala |  86 ++
 .../xgboost4j/scala/spark/XXXXXSuite.scala    |  95 ---
 .../ml/dmlc/xgboost4j/scala/spark/Utils.scala |  12 +-
 .../dmlc/xgboost4j/scala/spark/PerTest.scala  |  11 +-
 .../scala/spark/XGBoostClassifierSuite.scala  |   7 +
 .../scala/spark/XGBoostEstimatorSuite.scala   | 732 +++++++++---------
 .../java/ml/dmlc/xgboost4j/java/DMatrix.java  | 265 ++++---
 .../ml/dmlc/xgboost4j/scala/DMatrix.scala     |  63 +-
 .../xgboost4j/src/native/xgboost4j-gpu.cu     |  21 +-
 22 files changed, 1191 insertions(+), 724 deletions(-)
 delete mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/resources/binary.test.parquet
 delete mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/resources/binary.train.parquet
 delete mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/resources/multiclass.test.parquet
 delete mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/resources/multiclass.train.parquet
 delete mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/resources/regression.test.parquet
 delete mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/resources/regression.train.parquet
 create mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala
 delete mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XXXXXSuite.scala
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumnBatch.java b/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumnBatch.java
index 90b394e5a1c5..2f1870c580be 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumnBatch.java
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumnBatch.java
@@ -39,18 +39,22 @@ public class CudfColumnBatch extends ColumnBatch {
   private final Table weightTable;
   @JsonIgnore
   private final Table baseMarginTable;
+  @JsonIgnore
+  private final Table qidTable;
 
   private List<CudfColumn> features;
   private List<CudfColumn> label;
   private List<CudfColumn> weight;
   private List<CudfColumn> baseMargin;
+  private List<CudfColumn> qid;
 
   public CudfColumnBatch(Table featureTable, Table labelTable, Table weightTable,
-                         Table baseMarginTable) {
+                         Table baseMarginTable, Table qidTable) {
     this.featureTable = featureTable;
     this.labelTable = labelTable;
     this.weightTable = weightTable;
     this.baseMarginTable = baseMarginTable;
+    this.qidTable = qidTable;
 
     features = initializeCudfColumns(featureTable);
     if (labelTable != null) {
@@ -66,6 +70,11 @@ public CudfColumnBatch(Table featureTable, Table labelTable, Table weightTable,
     if (baseMarginTable != null) {
       baseMargin = initializeCudfColumns(baseMarginTable);
     }
+
+    if (qidTable != null) {
+      qid = initializeCudfColumns(qidTable);
+    }
+
   }
 
   private List<CudfColumn> initializeCudfColumns(Table table) {
@@ -93,6 +102,10 @@ public List<CudfColumn> getBaseMargin() {
     return baseMargin;
   }
 
+  public List<CudfColumn> getQid() {
+    return qid;
+  }
+
   public String toJson() {
     ObjectMapper mapper = new ObjectMapper();
     mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
@@ -119,5 +132,6 @@ public void close() {
     if (labelTable != null) labelTable.close();
     if (weightTable != null) weightTable.close();
     if (baseMarginTable != null) baseMarginTable.close();
+    if (qidTable != null) qidTable.close();
   }
 }
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrix.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrix.scala
index 93a773829f43..73abf6df9d68 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrix.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrix.scala
@@ -24,9 +24,9 @@ class QuantileDMatrix private[scala](
     private[scala] override val jDMatrix: JQuantileDMatrix) extends DMatrix(jDMatrix) {
 
   /**
-   * Create QuantileDMatrix from iterator based on the cuda array interface
+   * Create QuantileDMatrix from iterator based on the array interface
    *
-   * @param iter    the XGBoost ColumnBatch batch to provide the corresponding cuda array interface
+   * @param iter    the XGBoost ColumnBatch batch to provide the corresponding array interface
    * @param missing the missing value
    * @param maxBin  the max bin
    * @param nthread the parallelism
@@ -84,7 +84,7 @@ class QuantileDMatrix private[scala](
     throw new XGBoostError("QuantileDMatrix does not support setGroup.")
 
   /**
-   * Set label of DMatrix from cuda array interface
+   * Set label of DMatrix from array interface
    */
   @throws(classOf[XGBoostError])
   override def setLabel(column: Column): Unit =
@@ -104,4 +104,9 @@ class QuantileDMatrix private[scala](
   override def setBaseMargin(column: Column): Unit =
     throw new XGBoostError("QuantileDMatrix does not support setBaseMargin.")
 
+  @throws(classOf[XGBoostError])
+  override def setQueryId(column: Column): Unit = {
+    throw new XGBoostError("QuantileDMatrix does not support setQueryId.")
+  }
+
 }
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala
index 336f75eeffc8..4060697424c0 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
 
 import ml.dmlc.xgboost4j.java.CudfColumnBatch
 import ml.dmlc.xgboost4j.scala.{DMatrix, QuantileDMatrix}
+import ml.dmlc.xgboost4j.scala.spark.Utils.withResource
 import ml.dmlc.xgboost4j.scala.spark.params.HasGroupCol
 
 /**
@@ -119,7 +120,7 @@ class GpuXGBoostPlugin extends XGBoostPlugin {
     val nthread = estimator.getNthread
     val missing = estimator.getMissing
 
-    /** build QuantilDMatrix on the executor side */
+    /** build QuantileDMatrix on the executor side */
     def buildQuantileDMatrix(iter: Iterator[Table]): QuantileDMatrix = {
       val colBatchIter = iter.map { table =>
         withResource(new GpuColumnBatch(table)) { batch =>
@@ -127,7 +128,8 @@ class GpuXGBoostPlugin extends XGBoostPlugin {
             batch.select(indices.featureIds.get),
             batch.select(indices.labelId),
             batch.select(indices.weightId.getOrElse(-1)),
-            batch.select(indices.marginId.getOrElse(-1)));
+            batch.select(indices.marginId.getOrElse(-1)),
+            batch.select(indices.groupId.getOrElse(-1)));
         }
       }
       new QuantileDMatrix(colBatchIter, missing, maxBin, nthread)
@@ -150,16 +152,6 @@ class GpuXGBoostPlugin extends XGBoostPlugin {
     )
   }
 
-  /** Executes the provided code block and then closes the resource */
-  def withResource[T <: AutoCloseable, V](r: T)(block: T => V): V = {
-    try {
-      block(r)
-    } finally {
-      r.close()
-    }
-  }
-
-
   override def transform[M <: XGBoostModel[M]](model: XGBoostModel[M],
                                                dataset: Dataset[_]): DataFrame = {
     val sc = dataset.sparkSession.sparkContext
@@ -226,7 +218,7 @@ class GpuXGBoostPlugin extends XGBoostPlugin {
                 throw new RuntimeException("Something wrong for feature indices")
               }
               try {
-                val cudfColumnBatch = new CudfColumnBatch(featureTable, null, null, null)
+                val cudfColumnBatch = new CudfColumnBatch(featureTable, null, null, null, null)
                 val dm = new DMatrix(cudfColumnBatch, missing, nThread)
                 if (dm == null) {
                   Iterator.empty
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/BoosterTest.java b/jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/BoosterTest.java
index 50d25765edb2..c1283c8b9076 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/BoosterTest.java
+++ b/jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/BoosterTest.java
@@ -22,8 +22,7 @@
 import java.util.List;
 import java.util.Map;
 
-import ai.rapids.cudf.ColumnVector;
-import ai.rapids.cudf.Table;
+import ai.rapids.cudf.*;
 import junit.framework.TestCase;
 import org.junit.Test;
 
@@ -36,7 +35,27 @@ public class BoosterTest {
 
   @Test
   public void testBooster() throws XGBoostError {
-    String resourcePath = getClass().getResource("/binary.train.parquet").getFile();
+    String trainingDataPath = "../../demo/data/veterans_lung_cancer.csv";
+    Schema schema = Schema.builder()
+      .column(DType.FLOAT32, "A")
+      .column(DType.FLOAT32, "B")
+      .column(DType.FLOAT32, "C")
+      .column(DType.FLOAT32, "D")
+
+      .column(DType.FLOAT32, "E")
+      .column(DType.FLOAT32, "F")
+      .column(DType.FLOAT32, "G")
+      .column(DType.FLOAT32, "H")
+
+      .column(DType.FLOAT32, "I")
+      .column(DType.FLOAT32, "J")
+      .column(DType.FLOAT32, "K")
+      .column(DType.FLOAT32, "L")
+
+      .column(DType.FLOAT32, "label")
+      .build();
+    CSVOptions opts = CSVOptions.builder()
+      .hasHeader().build();
 
     int maxBin = 16;
     int round = 10;
@@ -53,32 +72,33 @@ public void testBooster() throws XGBoostError {
       }
     };
 
-    try (Table table = Table.readParquet(new File(resourcePath))) {
-      ColumnVector[] features = new ColumnVector[6];
-      for (int i = 0; i < 6; i++) {
-        features[i] = table.getColumn(i);
+    try (Table tmpTable = Table.readCSV(schema, opts, new File(trainingDataPath))) {
+      ColumnVector[] df = new ColumnVector[10];
+      // exclude the first two columns, they are label bounds and contain inf.
+      for (int i = 2; i < 12; ++i) {
+        df[i - 2] = tmpTable.getColumn(i);
       }
-
-      try (Table X = new Table(features)) {
+      try (Table X = new Table(df);) {
         ColumnVector[] labels = new ColumnVector[1];
-        labels[0] = table.getColumn(6);
+        labels[0] = tmpTable.getColumn(12);
 
-        try (Table y = new Table(labels)) {
+        try (Table y = new Table(labels);) {
 
-          CudfColumnBatch batch = new CudfColumnBatch(X, y, null, null);
-          CudfColumn labelColumn = CudfColumn.from(y.getColumn(0));
+          CudfColumnBatch batch = new CudfColumnBatch(X, y, null, null, null);
+          CudfColumn labelColumn = CudfColumn.from(tmpTable.getColumn(12));
 
-          // train XGBoost Booster base on DMatrix
+          //set watchList
           HashMap<String, DMatrix> watches = new HashMap<>();
+
           DMatrix dMatrix1 = new DMatrix(batch, Float.NaN, 1);
           dMatrix1.setLabel(labelColumn);
           watches.put("train", dMatrix1);
           Booster model1 = XGBoost.train(dMatrix1, paramMap, round, watches, null, null);
 
-          // train XGBoost Booster base on QuantileDMatrix
           List<ColumnBatch> tables = new LinkedList<>();
           tables.add(batch);
           DMatrix incrementalDMatrix = new QuantileDMatrix(tables.iterator(), Float.NaN, maxBin, 1);
+          //set watchList
           HashMap<String, DMatrix> watches1 = new HashMap<>();
           watches1.put("train", incrementalDMatrix);
           Booster model2 = XGBoost.train(incrementalDMatrix, paramMap, round, watches1, null, null);
@@ -86,11 +106,12 @@ public void testBooster() throws XGBoostError {
           float[][] predicat1 = model1.predict(dMatrix1);
           float[][] predicat2 = model2.predict(dMatrix1);
 
-          for (int i = 0; i < table.getRowCount(); i++) {
+          for (int i = 0; i < tmpTable.getRowCount(); i++) {
             TestCase.assertTrue(predicat1[i][0] - predicat2[i][0] < 1e-6);
           }
         }
       }
     }
   }
+
 }
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java b/jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java
index 4293486a97b2..af94bf975cff 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java
+++ b/jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java
@@ -20,8 +20,6 @@
 import java.util.LinkedList;
 import java.util.List;
 
-import ai.rapids.cudf.ColumnVector;
-import ai.rapids.cudf.ColumnView;
 import ai.rapids.cudf.Table;
 import junit.framework.TestCase;
 import org.junit.Test;
@@ -36,24 +34,29 @@ public class DMatrixTest {
   @Test
   public void testCreateFromArrayInterfaceColumns() {
     Float[] labelFloats = new Float[]{2f, 4f, 6f, 8f, 10f};
+    Integer[] groups = new Integer[]{1, 1, 7, 7, 19, 26};
+    int[] expectedGroup = new int[]{0, 2, 4, 5, 6};
 
     Throwable ex = null;
     try (
       Table X = new Table.TestBuilder().column(1.f, null, 5.f, 7.f, 9.f).build();
       Table y = new Table.TestBuilder().column(labelFloats).build();
       Table w = new Table.TestBuilder().column(labelFloats).build();
+      Table q = new Table.TestBuilder().column(groups).build();
       Table margin = new Table.TestBuilder().column(labelFloats).build();) {
 
-      CudfColumnBatch cudfDataFrame = new CudfColumnBatch(X, y, w, null);
+      CudfColumnBatch cudfDataFrame = new CudfColumnBatch(X, y, w, null, null);
 
       CudfColumn labelColumn = CudfColumn.from(y.getColumn(0));
       CudfColumn weightColumn = CudfColumn.from(w.getColumn(0));
       CudfColumn baseMarginColumn = CudfColumn.from(margin.getColumn(0));
+      CudfColumn qidColumn = CudfColumn.from(q.getColumn(0));
 
       DMatrix dMatrix = new DMatrix(cudfDataFrame, 0, 1);
       dMatrix.setLabel(labelColumn);
       dMatrix.setWeight(weightColumn);
       dMatrix.setBaseMargin(baseMarginColumn);
+      dMatrix.setQueryId(qidColumn);
 
       String[] featureNames = new String[]{"f1"};
       dMatrix.setFeatureNames(featureNames);
@@ -69,10 +72,12 @@ public void testCreateFromArrayInterfaceColumns() {
       float[] label = dMatrix.getLabel();
       float[] weight = dMatrix.getWeight();
       float[] baseMargin = dMatrix.getBaseMargin();
+      int[] group = dMatrix.getGroup();
 
       TestCase.assertTrue(Arrays.equals(anchor, label));
       TestCase.assertTrue(Arrays.equals(anchor, weight));
       TestCase.assertTrue(Arrays.equals(anchor, baseMargin));
+      TestCase.assertTrue(Arrays.equals(expectedGroup, group));
     } catch (Throwable e) {
       ex = e;
       e.printStackTrace();
@@ -86,10 +91,14 @@ public void testCreateFromColumnDataIterator() throws XGBoostError {
     Float[] label1 = {25f, 21f, 22f, 20f, 24f};
     Float[] weight1 = {1.3f, 2.31f, 0.32f, 3.3f, 1.34f};
     Float[] baseMargin1 = {1.2f, 0.2f, 1.3f, 2.4f, 3.5f};
+    Integer[] groups1 = new Integer[]{1, 1, 7, 7, 19, 26};
 
     Float[] label2 = {9f, 5f, 4f, 10f, 12f};
     Float[] weight2 = {3.0f, 1.3f, 3.2f, 0.3f, 1.34f};
     Float[] baseMargin2 = {0.2f, 2.5f, 3.1f, 4.4f, 2.2f};
+    Integer[] groups2 = new Integer[]{30, 30, 30, 40, 40};
+
+    int[] expectedGroup = new int[]{0, 2, 4, 5, 6, 9, 11};
 
     try (
       Table X_0 = new Table.TestBuilder()
@@ -99,18 +108,21 @@ public void testCreateFromColumnDataIterator() throws XGBoostError {
       Table y_0 = new Table.TestBuilder().column(label1).build();
       Table w_0 = new Table.TestBuilder().column(weight1).build();
       Table m_0 = new Table.TestBuilder().column(baseMargin1).build();
+      Table q_0 = new Table.TestBuilder().column(groups1).build();
+
       Table X_1 = new Table.TestBuilder().column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f)
         .column(1.2f, 1.4f, null, 12.6f, 10.10f).build();
       Table y_1 = new Table.TestBuilder().column(label2).build();
       Table w_1 = new Table.TestBuilder().column(weight2).build();
       Table m_1 = new Table.TestBuilder().column(baseMargin2).build();) {
+      Table q_1 = new Table.TestBuilder().column(groups2).build();
 
       List<ColumnBatch> tables = new LinkedList<>();
 
-      tables.add(new CudfColumnBatch(X_0, y_0, w_0, m_0));
-      tables.add(new CudfColumnBatch(X_1, y_1, w_1, m_1));
+      tables.add(new CudfColumnBatch(X_0, y_0, w_0, m_0, q_0));
+      tables.add(new CudfColumnBatch(X_1, y_1, w_1, m_1, q_1));
 
-      DMatrix dmat = new QuantileDMatrix(tables.iterator(), 0.0f, 8, 1);
+      DMatrix dmat = new QuantileDMatrix(tables.iterator(), 0.0f, 256, 1);
 
       float[] anchorLabel = convertFloatTofloat(label1, label2);
       float[] anchorWeight = convertFloatTofloat(weight1, weight2);
@@ -119,6 +131,7 @@ public void testCreateFromColumnDataIterator() throws XGBoostError {
       TestCase.assertTrue(Arrays.equals(anchorLabel, dmat.getLabel()));
       TestCase.assertTrue(Arrays.equals(anchorWeight, dmat.getWeight()));
       TestCase.assertTrue(Arrays.equals(anchorBaseMargin, dmat.getBaseMargin()));
+      TestCase.assertTrue(Arrays.equals(expectedGroup, dmat.getGroup()));
     }
   }
 
@@ -140,43 +153,43 @@ private float[] convertFloatTofloat(Float[]... datas) {
 
   @Test
   public void testMakingDMatrixViaArray() {
-//    Float[][] features1 = {
-//      {1.0f, 12.0f},
-//      {2.0f, 13.0f},
-//      null,
-//      {4.0f, null},
-//      {5.0f, 16.0f}
-//    };
-//
-//    Float[] label1 = {0.0f, 1.0f, 0.0f, 1.0f, 0.0f};
-//
-//    Table X1 = new Table.TestBuilder().column(features1).build();
-//    Table y1 = new Table.TestBuilder().column(label1).build();
-//
-//    ColumnVector t = X1.getColumn(0);
-//    ColumnView cv = t.getChildColumnView(0);
-//    //
-//    System.out.println("----");
-//
-//    Float[][] features2 = {
-//      {6.0f, 17.0f},
-//      {7.0f, 18.0f},
-//    };
-//    Float[] label2 = {0.0f, 1.0f, 0.0f, 1.0f, 0.0f};
-//    Table X2 = new Table.TestBuilder().column(features2).build();
-//    Table y2 = new Table.TestBuilder().column(label2).build();
-//
-//    List<ColumnBatch> tables = new LinkedList<>();
-//    tables.add(new CudfColumnBatch(X1, y1, null, null));
-//    tables.add(new CudfColumnBatch(X2, y2, null, null));
-//
-//    try {
-//      DMatrix dmat = new QuantileDMatrix(tables.iterator(), 0.0f, 8, 1);
-//    } catch (XGBoostError e) {
-//      throw new RuntimeException(e);
-//    }
-//
-//    System.out.println("--------------");
+    //    Float[][] features1 = {
+    //      {1.0f, 12.0f},
+    //      {2.0f, 13.0f},
+    //      null,
+    //      {4.0f, null},
+    //      {5.0f, 16.0f}
+    //    };
+    //
+    //    Float[] label1 = {0.0f, 1.0f, 0.0f, 1.0f, 0.0f};
+    //
+    //    Table X1 = new Table.TestBuilder().column(features1).build();
+    //    Table y1 = new Table.TestBuilder().column(label1).build();
+    //
+    //    ColumnVector t = X1.getColumn(0);
+    //    ColumnView cv = t.getChildColumnView(0);
+    //    //
+    //    System.out.println("----");
+    //
+    //    Float[][] features2 = {
+    //      {6.0f, 17.0f},
+    //      {7.0f, 18.0f},
+    //    };
+    //    Float[] label2 = {0.0f, 1.0f, 0.0f, 1.0f, 0.0f};
+    //    Table X2 = new Table.TestBuilder().column(features2).build();
+    //    Table y2 = new Table.TestBuilder().column(label2).build();
+    //
+    //    List<ColumnBatch> tables = new LinkedList<>();
+    //    tables.add(new CudfColumnBatch(X1, y1, null, null));
+    //    tables.add(new CudfColumnBatch(X2, y2, null, null));
+    //
+    //    try {
+    //      DMatrix dmat = new QuantileDMatrix(tables.iterator(), 0.0f, 8, 1);
+    //    } catch (XGBoostError e) {
+    //      throw new RuntimeException(e);
+    //    }
+    //
+    //    System.out.println("--------------");
 
   }
 
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/resources/binary.test.parquet b/jvm-packages/xgboost4j-spark-gpu/src/test/resources/binary.test.parquet
deleted file mode 100644
index 5897b6fadb2b7a9d7081e7c96752b414ddd301c4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 5417
zcmbW5c|6tm_s6fDR4Oi4n53*p){uMO=PIR$N~@%@H7T-0MH{j&DT+!>QW_>BOj+W-
z&(&s$HqnquXr^dW8bbNqThs7(d>`}2Zyt~Pxc8j%`JCtD{eGX<d%H`mos5_aX1)xw
zREAlUQE*OHm6;<dh)6P}8H|VGy{kmU7z{>^sEG013>=yl#m*}%#H|IB@OkA$<d9VK
zJ>N%4Oj!J*2Ml25N&`p_+(yrSkpk_DQ($Xr3e4JPjy~P@fnQZdRl;@gyzL5nkmZ6M
z)e(d_o=rCApG8Jb4d~hLhMChhLUGt;POhXOEYfHt$2Il9%c_=ab0v^C%NQ-w^hy7`
zO}zJw(Rf8W7UcUCA#BB4;^_LE*gbRrjo&7N?!HYpJVq4Fa`%9<i8!3@&w&839@4e-
z9i8>ch)nR<g~iwQVo!246pKv&&M==`3-`cd63vi(<T|~6Vl0}u%!P>QQE2MdfF@I;
zQ0ne{*sZV}?T-`_m*(e?-dT^b`(udp${9chm&5den<!eU#Ch2f1f4mpye@eI&bTan
zX!hv<W}-FTT{S>{eY^}N$NE7~@Fc`{-ry<{h4CrTC>xW6pZAut{Uis-_}h*+(ZLU|
zTPlKD-%;o}{R%4`3UP;88(9@<kJeZ3kS_U?m^wZP6J*bjGMVwHbRdQoa&0_pxu=Eq
zcVv@j?F&4Kj&S_&YBAl|;sthh^aSHn1DDsWgZn#gk^H4AaMgxZUR}yjNP3wLa}?}h
zfzmnbRKJP)&gI};@kqS1aXB<}KY@a|Nywh&1ATcMWIrgR%T_CJ(_AAtd)Ez-%+_n9
zXnrKc>#mqZE2+x8vpDqSGMxUHP7;gcQ0lom;Qf2x{COTP-NqHyjq!%TAKwwpH_15D
z(gXidU3XDV|G0lsT~L=8x9;j|yy2u#vFe%*X?QJ*F3Y(f_S_YA8OMUZp#wDJZH7&D
z#W*w78eGp5z)JoAI$c<Z<8%sfPgy43n4kgWs{x0-yTLX?0(qJaFtx%Azr2~uzny#(
zvf5|!Exw5HpT}q8Qg3H4S3QGI&Mv~}%6y3MwZ#N6J9ww2!~Ga4!rgcAF6u2^25Zw?
zU~EPL7>V9M$AY<#R<a(u4Wzhct;g|-Qv%eDKaGB?55a<a+Sn<Tj|TFm;MYBoFvR^6
z4adeoRdg6Q7cRr(!7{v5eGJPJ4}x`99xh@$hq;B5@dPs*IB};i|IB_M8^rm{&rSHy
zM+NpC-VKtjF;LKY9(0}O^Li3YAfYS|<MnEgvDlp3RJ$1#w#xFI_DgV!n*3p4+%R4f
zO@Z<a{@h`u1mJ|UgRS-i_*DTpwTvp<s(At$ImsY)MGD<e6=LO`C^u&o6c61Zz56zR
z+oTI%=0M<h-wc>7n+MiUEFt~bRPI27EJ-`C0vnkNac-e6glARa>&%<*@n#x|>KuYQ
zVU?I%RfWpcr(h2AAWSV}faB_qgj>=GB@Qb5PgA`(hAjqIvH2xsEX?K9ciMvD&#Aa+
zx*DHnrcJbiZ()mh8@!w$1Bb>O;77*B5s3&1zG9gQ|6b4q94Jab?>nW?weSgV$wdYJ
zt{qSD!Gn1yBKZ^Cx>bsIDr(SkU>E9~7>}lM1bz?erUUw8M^yK`>qM#azo~BE{t&%b
z@QNCS9Yy=RHYyvMjc<D9WBF}$VEx>RJOx`Q?00~gU^z^_J`IdhHUe|TE7BY6fHuD#
zfW$TGcq_4nzGi!1^XGlEen~c`d_pO?SD1;L=9|LL^~sPNKF2Ca_b=*vV>fA5QGl6h
z0VJzKoQzo?0&g!a#E`y=<cWnk#BC_2H7h<4<qf8=x=a=P?`eZVQ8GDEX#!IYhvI#S
zZcf`g32<_o4Q9r6uwP>qG<C^g%ep1>xD*3>rccCD&RVFN3NWT^5-`hxuqrMZeD0`Y
zLazxmP&rESJs;4DWj8QqqZ&TD@Cymu{FJh$_|TV6<Dl*BF>LtdJ?S@D2@5L@K%iR=
zu4!FOCTVq%jy*%P>x&I2r7ni2;oATg*Yg}|wIKAB8ENQ^gMhn#5xb@+XpCy2VPYJ7
z7XBm7nsSw@^_WBI)@sVk+ltP%TCii)HW;H>fMcp-@Y*URh+A@=-tCiuk}uAf82FMN
z4*Y|(<<B9Xw&hfus*50lO8~=ML}74a7p+fwK_!#kQO&JUK;IdXfN~#@cpV8B%zuCx
zHWQK8DbD$@{1Wi0k$luP0Ck;Av{-l<OCQJM&jEVKOT9%htyV(gPIuhcTTU!j8qxUA
zYV^lwFTpGs;*?nS@jP#ypc(FRX!>ge)+(>!v@w&gVU{wyeQ8HU?&@Lll^BSw3j@Oy
z9dt@k2)<CB0#i;h;Ye2uLZ17G>MFA<x<&s@b@jS+WblWhpt<J^zGpPR8o5+_Y0wEf
z-h4pE+lO#<^-U}-amUjqx8Uvk^(0j_8*Fm_BKIc0hJg8dP+>(k==%<F_Mb1LHLVsL
z+wLv!P~RQK(BASo4?TWL_%viyU7%Y_b8()12m0ktp(?*GrWP~vY52JUyjl1WUiogP
zRecL^=ZAhepc98}&J%F(W)Uw~`Y=^!Y@o-~mSc2U7!FTWfosr0GW5FUxLW^79=9(7
zr+86rn1})tovx#L$v+@(e<E~LPvNEaWWae5YiyJ~j++C;`KxapKt<n1p6Z)l;Jmv$
z_AkEy<1X7*h*br#t^J3=&wPk9Z_k1IvL@Wuw_4EQ4>#!5O{eaic_b!eGe?@4z`NGg
zj_V8Fpj5$q$U0UFi+@lcW@kI7v*cvj_>&ABo_>-n)=^<Ab)UkWmhHSXd0lwU!X3{2
z(Fk(8qcHc*cF>;@k7YLgyy9ggG_Na$Mk?%wgU9nQleqx893Rq;OJ{TPLmNrHem1h3
z=F+CNB{WLxC}h61p}gwJoC}cw=)7qfhU|+#n+H`S_M$Z$$ry{f{#=CCCmU&+?q*nU
z;|vCkRVQ=v4?>V<3+2%`NDN`o-3uaduVfQXbFdzKcWM&b#OpZDAq{4$JAp`F9>l6f
z5%0AwL?X%=#gY`cRWI5>Gk7K)8Wg4CMdrvgP6UTxDX5f?!?K_Ic=L8dP#3lhl-Z|a
zE+^^AfRhgIpB?saenhXtzo{;uKLb>LxI-$|_CY*%F3>rm+~AYNP;9sXGe6GbUVeE9
zL*J)jdZ`we9{0wU%yblwO~I_JG;oO;!oW%P5WIN-IHu-8waR>mtMR8%%x5$wq#tyn
z?jb|z9>#2y!vybIayvr@uSwbC*boie9Z*4e(Rp~JJ_SlT9?*N4k~sMLez?9Y3Ga)C
zqe0smJdv{jUDD^_k~iTv_4hJpedI<K+9YC4`%dJ$+(hTM-dKOd0V)#@b36~mLi_u}
zFfIQXzCUvS+ik7Ub$&5C-y_1$f9(j9-im<bC4y1Gk+9{N3ippK8K8O84lfsN0a35)
zioS_bd@U16ZjLMq&$H7ofLDesjbpg$JDh2KbOZ)H5e1Jc=IB?Ig1g!WF@DJ;*t5k8
zZ|ya}B33op&;;JLpRQt1b0m1g{sO~NS7AZrc@lj&2wQSoVRqkTNa+f~uwAuydDCRB
z?5!>cHC}?BtQJGC5gYB;H{i(R2zXjF86{TFB@Je~IEONs*!U?Pv<q~RoOn&LyP{!L
z>R6EK;iKAq1uS{mh2uhb7=FtU57kFP%#NuX>4OaZirNHpY&eL|xc%T5IDs1~`vR&`
zGC}jR8)oLc#>2-E>TnCbJgyFZ>XwqK!HN9(`6~2vyc2EqZ)ckc&WvyWEQi(`3rP51
ze?U@hDV~WnufT>Ncsl7SZsdHz8JUmBy6we~cJeU#G&<qzoJJ^C$cKMaSM#W}@LzXW
zAB)A<bIh2wY@P;dNh0tCt@jRyAg5-?TlMWzqKOl{IN_n5DA8FBxsNZv#qmo~*Fp^E
zhfX8&O3$Icttq|!IFqP$D${vQ3}8*4fIW|9;-X<U91KsR1`(^EqPYN;*?i%Z+?3_Z
z-c{mSX13G9^4q90{whaHjE(9`!b!h-B3&EXKq_kP)6kSJ9Hk@2=!@m?SYc~rwJAOv
z*VrGgNJtmw6{p;xcHK6BM~cWZ|6V$<))=fCZ_zEyYX3=CJu&^aI0u&r~WK<knN
zI{R9peTf7JCN}7Is?ZMKJg{l5=43B>g1Mr4#5%fyZY*x%r9WrG(>ssoFK2V{$4k8+
zx|dLih6AvZcamIaL9(~@H!w3Fi*lJuamN8!?hpA{?9)A2bZB)SiYRqK_~%3P%Oh3D
zD>s9BwUu~UT8gXu+e*5ifCH6zrr;-SO<#Qq=e>BGME6J3!rM9<WQu4(Z%qW;{uqKA
zZmxy`_6?qyVEjxiyotHxEZj2aNY6~YOH8Yo{HJ+m!9H0X`xAOeqJ;)L?Ty9t-IE}%
z<`QhMpTga^Es>+`&`U}cW9ZsTESz2M1jp)HI2nzt2rmwz{AWeHq@z$VsA~s)>F1y<
z!xv(ANZ^yJp;&xHmDF98M2o^2^bNPd?dhM0+(RY&v`v*XEtNv+@%r5Rj1%;?NDkHe
zokPQ(xk0l|FnpMvg`B2jPWC_%uxh4)*zVKF6lYFgDljkpbtO)YEpWsc3~5ycBMbzW
zSB5~67ECttNngfM$*;?x__yn=s?XndTM=z;c|pkg4}oz(zsVlI-@bc|qOq9rL{Vjy
zxbSK#_z~WGn4-#x492&x@JuQQ3y;;K!@|R%AS^s@j1CKTUqM*7p^Xj;r=1`yTxCax
zh2t&=3%fr$Ea*s>AS~?E-(ginY33^#L7N!Ls*1fbBX*noh+U^PVwY-<*yoH!>{=_P
zj}Rl^CN3Pi-QO*6>xS(fo4tJ9nBy5tX-!d<sE6Tq!|1zV{M|75ZkT>I%)S{CK5p(_
zJ`BYiQDs%-Jz9d*Kw8;MQktPG<S|7Bam98S#aA*~((G?T{B67y89HAF{F^7}oS=Y$
zwlNrr{e-EgSj=V`FtmDQM$?Q&(|TnDIALj6;=(!!aU<Sn+(BTn6g$7-+T=&$j7Q_z
z<OMikNm=5;`U-I)-e}x;Aufd@Y+aq&Xq?GtT%DQ#CzJw9T&N2nZp0gnqe2}2E3Q;~
zG|qH1u2frq6H16BF4U6{H{y-PH3@NVzT(aqjmDXc#+@?~;Dpj+i3@co#Ep2Pac_jU
zlSJ6rTC1-(aY<=q*0+*>BmN|!cwR*CRO^tH>NnvEzXfF!K!q~?<}sA@g%eg#UqO+D
zKMciauAt1dLbhU;l|Ut0(u(<P1%b@<SuDT69}V36-3011_$pL`9bSRkz5TYaBuos<
z42&2o-T!%Gdw`FDK+^%iUO@(demf0(y@K34-Gba$6=J(Jf<pYgH0Ee*4+;$S2-48k
z*yQcy<GEd9j>~S1ZEn88M;?ZP@ZYa(^6_&MzO^mbN1$yV!DB(5m%he#q{f`xd-v*(
zq#FJAROA1iYVzMxP5*nU*?&&`x=j2(vt9QxSayF`b*68i`rk58H!?6XF)&isaS!(P
z@l^M8H#M>_F*3CDG;=pMG4=8=votaEv@kdH@O1Msnr-A|YG~o<>F%bhyhpHI{O^Y$
MFTr3e75qv1fA&DOegFUf

diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/resources/binary.train.parquet b/jvm-packages/xgboost4j-spark-gpu/src/test/resources/binary.train.parquet
deleted file mode 100644
index 780efdc13d363a1509b8faab61a8a1b9bb1c0615..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 5511
zcmbW5d0345`^TqDTAfnOG?9=*B<)f?pL@zdvPYsMZG@4u2t|uZrD(H7%GRc`RHV%3
zo{}X|C?p)ju|$hqiSwJ`IJ&Oyb^iFB>zZqx`+h&q{klHSbHC^Fc|J48*3p>5;?&D?
zn&mk+v+smzaZX5!h5j5l7VA0t{W9r(EEem8wA8>DOEmn`6S`_5&|g^|L*F~n#t%=?
zOyeU-&KnEE=j<WEGbSkARfTt@^}%SWHT)5(C^~lYG=69)rfXW|@zy9KP>a%sX#H=L
zWjP0n2ld$G<esF3?o&X@`8nym<%6H}cfi{@3(+EX7akT!qmp?MNtack!PoELlSoI@
zZ1IDKHVqJ`$N{N`i$L&T2Slo#AhQ@ZY_o|&`z#kCr=~@NSDhz~>8?!bq^UyP9xi5|
zbbyWr5ro9H2_GF#Kv%P9;&&zj8d}oO{6K$EY4vd`r_GSejrq*$Z|y{H!!nG2RY{fj
zDLCTbG8@~=A4ynaB=9Z$v1p|M9MYPP4_|(zi2{Gf-gO@y_zr=_Thg@nEEfa(7&sWU
zAB>Zh!<6$2!LvIawno-ci<ae>7gr81<=g3|dPi{EW(4+~#bm@9MabRqi7YHxj;^m(
zKt{M3>}gbkEZZiqoUtBUFa_7i7m<-Or()v(9{+}79KPM>h>?d|$@``la4y<UbN4FX
zmEAog?Y%b4z4wCJTung#oawN*VH~7{JR{b+LG;neO!WHe01RB14E6TexGL9^nus*;
zMV6Gv;iNLDC`gBO9kt{Oy8u!gPUD3KLvhdkaAAQ|4BY+9h0kYvp{FDjotR}XHgp^`
z_+^30yGJA`{0tGOpCEFRORy_f1Jc8iQ86G3&P+N7buE+NvAY06=Q2cPQ6#MQbjGC~
zH=vyHIXCs95`VkDh{`n^!9ON!dQ|kB{ckSp{c`~SPC*KkH#7L$?g_M<H|P7CDnqWR
z1GuqW!7|wzY_2?jX`|X1-@@%^DZ3U|<!QnGwwa{n_AyL9ABC%|HAN{OM~EsWW`Z_s
zL0_Hgm@C+hjg|#)m@b2x*9*y<9uS4AM1b(<Q^<85!aw3Q6-v7cVEK`4G-Ki`IuF@+
z*yA?VtuF$jl2MRbHVj3xQ{Y&dJ8oWh4y58Mam^p8XqRdL9hHl5MO78Luw&rtc5|Y$
zYaQ55%}4K|_tbI0C#<coMBntajMIVzwCD2{Tz55+AC{y9#lzwtuJ$kC#1?ISc7rMX
zYB>aMrfmg{$0@K~xEy5D@?nhk6p)^63@?T}5w>JM!jaL^RC91ChP%pQ?ZDqeF9em4
zxjz}CCPly;Sq+h6>wSD^kpfFr)uCK<0;(pRfa29dVbE`UVERu3k5L+;@!kXYGhPJ&
zLIIW#Rng<Sm$5|i0Di?2*30)8k@Yv`LAm8%QT)+($WPo4WnNKmptc3I{@R9WHxe-A
zVK+UixgT=RD)UcW)1c2R(_yx%6Qrul2TkQSs4YJM$2U=YG^83hb<3gp_Hh13DFZ0I
zpo^zwy(8wKt1;Tc1&#7KqEZWE{I+{E-_foTgQMn(_P&mWd8J1n(^;3&L#3#tp$GGN
zqL5cG6Z|@-LV<rYWCbRiW1d7oQdSs@EZ;^Q+|@<hE!WW5W)+adtMR@SS9Gn*5Ie61
ziOkO~LHleW&dy6mhk<jMe_WUvx5Mk-O!z!aLnug&h8jUM?!R}+I)MEfO#h<@R}UTx
zDpvL|v-cr!-!hO^^vlAUZSf?!os!>&-Nv@oRLl(;jxseCxGh^%`033x;n`Q$g=3Wi
zP}<}&&EKwuYic&ZZYwP?YZ!xx>kP^BH_xg4C0W!|n@tRNeIk=y`jY6O6f5t|LWf8_
zh~)Xf0+|IMeCPpX?)Ko~t`4`^fjHZWjV_PuslGxpy|Ss7M2<A0_cq<5W%E{2UY$9_
zCbp3YXa1lDgQ77%YCCPaJQ|ujf>7h>3w*nL0B(D<2i%OENkN`IOeva%ZufGDvrjnw
z&Ws~DD&t_rl2mv$?KkW?vK*{8e4{OUgQ$119hBOI!_m}F<Umv>JvL!DexwT+`;{dS
z53z8oBm~%WFWG%@GID~t=*8VL;c><oTyi!B)+;kGed-6|l{^|sHD*vh-zl^`ITB1#
zBS@Kjz3_0*CaAsljXZV@!q-o7Nie6CeyutVJ*`7feQXsCdp8`q4IY!ONO`o~Y>jso
zrIP9XPOt~2kV7>AIQR8B*cfFGQED2n;({U0$k>Oo#_GeSwgT8!F&B^SeNDC;&7rGB
z9k^>wFzG&T1A0Nvm=-lQXb6wvTBkvTi!D?wOr9Bf-wvL)ZUZ$z1dd!=OSDXEKr7WA
zE^Dhn6`uv^mj=Mr0lTrw%MRW>FviCX8^A(N7kbK95i&*r>k9LPpKgYc*+)F^_3T;_
zGQ*!{UaSO-4_fHepz&)e{E&FJQ~uvvD6Q>=pT7)&FzuZ-JJsT$Sl$79dhK9gSufeL
zA_QISs)fqq@^MU7j&&RFy-j(m60jQQ(8)DUOs#wvoqV$ud!{Ua-F04|=^iTFI#pTN
zIX@OXrhh(H)FclztmVwi6#^Qel}JScUO?>NO5udeTNrsml|ONS4z_n+hLvd#ghx+l
zz-Pr0a5Z+oc)1(6vdWk68w-W6EtD}WV=`WN%n_Y*Y9edRGU)!3b@X(K5t;;^#fOvk
z;b5E5(34>ZYj*4MuLNJfEnd@T(*PydJCK0!?slyAEMz*g{=^iS*KmFAZ=z1ecPQZW
zV!CVsZRz+*=1quzPZv{AI*SKZm)Z%OPr`vK2GV}!i^&MzLX23P3i7?vf!m{sZ}qRk
zX2)$*X}B#69`1<t8&?Vq97ocZV@|+~=~3{^w!~(*og#)DxI*SV<<nzblj)TaNksQ%
z9C6uV2ocv(K*he7{4QF~L@q3ZJ6Db9!J}-v9dw5#^_xK7zncZ3(=5?ir9*Ue-eFYj
zAC4^>Lhw;>BJNCR2D_Z6m>_?M*b_EB^Jhb==_N=fzhg)HSF&ioAB;E25HIFQcy+xi
zRyjwa+qZ*4&%)ERdl6DER|>7~o$1gaY;baFCfzCFXuBwf+)I*zn%%cZ&K@p*=z=jA
zBO8sm6FZ5o@o2borib3=YM>rOVO7IkWWrgZITg;-+r<^mDprw;nhwm9)1$FT<~}*t
z_>Gb)YN)<AS~%2859K#Tl7FVcBPH*jDE^xZEmV5RE^EZL2bUpJqmW##sHF3{xcnz+
zi!e4i8MD|NTo<Ye8QL=ZFn4|Q{5S)ZJ1qI$Q$9oaE_)c)5(-Z58|b2ne7fNLb#kMl
z81|gbCpA(G=ngoEUt67UZu3r%k8wkj2Wvs*$$pp^s7f3s>0x(Kr|`DoAb!q+EO>o+
z7p}Y0fg2-t;5u4>2NTOs-iE>Ks1n$xl?u1Dbs3jE??^@bc$61>W$as;h(M?gWOynj
zJE-EliS6XW(|Yi#DQ3#gX5#7Fb>P4!e3e!U$gtakxr0KerLs3pe0CUAGed+nLmOaU
zo-Q5LwgT%0+{Q$gL|A|C0Om~J0F6`@d><V~E2Y2C)^#4n&SrtdNq<l-W#c&)Y0*%t
zYAhOGimFvXIHqtdbfkqtdfg37&F}_qy-FOSl7v#1Mq-bKKUR-m(>FXm1YV-JJKq5H
z<+jtw9iMTeT^#y{T>*;#Y24`J4cg`7MI8~DpwYA#d}9~k;F_%%Giw|~Pl<rn8wX<g
zvQ)exXlFJpya1g1<LEa(1;2iogAo;zK+x6+!Q)Tj+xZ{xh*=FB8l6a`{l|h+3B#9d
z-bV*7E5Xh4O6lnxxj4N|j$iK+OHz+DlRrlqqkqg+%*(rkai^|BpyFzjFWH6_Nr<*Z
z)A3!xID9pC0@{hXVe;0k7~Xsg0`qR+wwNk}!^XH^YCN1yHx<r~a6{F*`7kN&6vo#s
zCE@MkL=W^F;8g2+{KtiFZWL`-{x=u)_Eunwkd1H+$)rKYG5T{27OnY%l$m8fxxG8a
zWLLp`)>ZP+iH{}6^odjb$8)~U<rv<OPs;U^NO!&)eOGndYS^AKn!7U@c{j7bYj7;u
z1?Y+_>^C!8A6|gH=dWSr7&kDPZHhZX#=)(JQLxljp5M6a4Jlxkp~|4$%!O+ldeW4G
z_IddzU2=v_wmpI?mHi>>QZ@K)loAON_o8>n6byMTBYJscAZ(tQjIYXH(^=7VFk5dK
zxD<BaK;B#w^}E8X;tb-KUp<VcopzvZb*%8hl6<Pdyv2m#E{v~l0N(U)I)UX5q23p<
z{<tZgP*_0xnSRtG(h4=p^6|i-3)ocnnR%9Afkr2n;*FI&eu4=fBaez0rw?P{<=k&H
z`&9~M%ghG_Pa%X#8R8e81QIxHCrB@O1CK`3(3Yhv@st{h-=wx+V(3z+A9+Gpcg#^V
ziTj15t|}v6`{!Ubtw41LQ%IeaKoTcD#@9uDIKeCopA_4`ii!QvCZ)e9Qfmh)wQmBC
z5GlT!iX3dHxQTAY8cf`(7&t=}8QHoty5~|Q{e6cl`qrGpdRKY4bN)EK*NUQ@*=h8p
zX&OCP7mESTGE~LAmCmU@LzWr4qTeNd^o?jmH>opV{^c#{t^UF+YSYKGS(G?#*^Yy>
zs!3aQ3cY(TQ&{PA4Ufi5W<r!r=o7^}@}~JC?Rsnt-6PtVfQ%iOzPScMH<pm6Fb1u+
zq~K8lHOynq(#zSOfy3sgaMU@+KUTym+lUi3izTPUVnu*>Z_E-~)RJSvsg3Ns{bR9a
z|JXfi3I5(XOO2}5jHv2W8If4m{a)bnOiB4maHZxzX-ytmvO^XhB%3;pv?iCu`XNhR
zO~tZgC(u`xyqt(-$-6>dS+c^4WyvDiSC&i(Vp%ea^_3<46U!2N^_9gp5+Rl)H}$uy
z#g*eED2T_z($wOnDsU2i#s}1Y>3Q0}^wQD4Vv)(OIF{iEBp5L_(OxOg-y>j^d!U!E
z&uS0OZ!C`7NNJw5m(kC__-A19Gcf%bnEedQe*`iD4^JNfi+e&^Q%f^UPfm*?r)l0_
zj-@H#aiqmMH$s7%pr9vb^Wzc!c-~yr=<g5wM<%{GaRbF;W3jlUd=8iUmf;w(^imc2
z(u{x5q7=A^3gS~@oTN28wxmxI+^?)J?h2p7<7N{H?tpq<oJrro9Z(nJBrWB!CH<A)
zer0`etrA@QcU+!!Uz};*(dB82aS|)=*b-kzaKEy?xE=|v_dBk1bYGlVUtH;EF-~G3
z9$Vrk3GP?c7nf=)#ub3%W<@4_aprw-A`>xAVm%&P;zJ4USJoF-Ai*_%$Hg+=aqRwb
zn!Fz^|3N$;#m$k@i)Hk#+GzbSuH;+XMln=k;~yDI(?Ifr757)%WXXZWt+o}nIhHRT
zDmRA_yF^cp+s3GiZEnEhtqE9e=;7}ncBkQYqZ+RB2?$)V#*ZgsYG`g~%;M?)&lQ1d
z1%_fzuMPGIG7MM~Vz}BT$iv$s$b)yTU#L#d27ezNE1kfgfMBm69Rr<ZD|`grfjU;M
zp*nsZt0kwrjKuQamn{>l@sM2Q7c3BaTOd9yuJbX_`H9rA3f;8H;8&^fe=jxp@1>^y
zz0~Z#mzw|Q((lv6|Fha{6N~5YcUQ-)4jA^g4Tc#T8k-s#4;$?nyh7kT%-hq<*wWP4
zXri~d=LAzTA20KXrbgbD6O6pPJ$#HUjD5_EEWN!wJ@hrh#LLD1epqTUEY=+HU$FlN
DCJWfv

diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/resources/multiclass.test.parquet b/jvm-packages/xgboost4j-spark-gpu/src/test/resources/multiclass.test.parquet
deleted file mode 100644
index b8347280f9935d131d9557dac06cdf2558b3b304..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 5374
zcmbW5dtA)<_s6Fxx{ynm*&>%hkxVsRRr5LTtV^_!yCh6E(p49$5}~Uog%p+CQmZz(
zlxRNZO)XI<6n1lGL$Op+Qhqb6t;gg0*gt;zc+AY_ob&md=i~E!pV#Mn-tTd=oo~$J
z@Di1IY0A8g<jSk^TD)woG}51^#Nk{~Xq&;6=WsaLT)Cr-{b}a1=OEYa3G)xgm1S7_
zL0#8fJhd#8rj$qE+Scpr*zgJ%6@CV-yGj`ISCQ~qI})NI1WetuBbd!ez^U%1(8)WN
zaki;u5-aqn^ZEYNC1e72J)MZNMy;fWw%&*FJa5?TbO*fetFn3xRZ#Ls428=md+6X2
zoSM&t_u4P8W8x`zowf?iw~T_Ina?3;ni4UqJ;B~xSj7zPP{!>OZo;hmA`G`r#Lfjm
zGI-e#^3%Qk?9&P_k?$34`fI;^VE2a?6ST&Yq#1Ux+x;G5czZsaaeIbu&4y5w<!$)$
zd?ktPd1cysR-GL^GLaPu%;=Qa+BE69F6s%}p|k2JY#QXp+-MRp{(~zaZo_q07@vo+
zL7IT4?CD>Rvmo`A3rts=j}KagQPUWftyI)wlEZk+<T>$@#{=I%lW!(I$^BKNa(uZY
zPm@PIg5*KFn4r$w^Y~)feVEx0Lw6daVX<N~$nQOkYA#DjM#F99@Ycs*8d(C>X}a{n
zh52Z0DaHX$Q<?crb}+F+lPPLUV*)okx!^rx55yHAEID8TTS~66rN2d%9lL7<KPPU%
zV;5Utdz*lE)hI#pf(Yo22*cv~L~J|U2yp?EAtpVXF4jH<Pl~tU$K#5y@xDE-OSpp3
zm36qMIZqO~QAAISHKp@w$D(6=66>#J1g$=gal{oTkStcimacpF;F1HK$IoGZz)<){
zWl2Zk+7$myWo{FVnDhfN)T<<s2%9q5%2z6Qzj-q5QX2}VZ$*<9?m9ZjKMGu*0nS-8
zj0P8Gpp)T6+|*hRF{bO8leG=(oJBwi#c{O3a2yU1=R=J?l`LyX#eIgF;2Nokqa&Rd
zlkI$}db|SW#fzA!Lmg;se7ne`(2>ettHMI_RhYUvmVOCh*j*_PU=8mGmX;;JWwR7)
z>|BX@i#IW;Uh?F=s2$TE6H?{87pAGXNE*j(qv@x=fU(sUnw5p@HPsTRdfmyyP98&N
zu4^DRp~@tndkoz-(j06(575{NnKbptYLe75hs+VS!L`HDC=7i}Uvyjp?h%T^XWCJH
zn-2Czt|c83`ZK5>jFs$BM5s!LApSc)1J|Gu6650N!RjKWuy_oPO6RiU(v`{507II=
zHDF5jY0{5pv}y8!j}YZ_6Fvzhk~bBOq^9W<#?Fgm1n!!UGU6#~W$R5kp9|=xlpgRM
z>qSB?4WruGA2A}_68cxGl8nfK#Om2p(wX%D-*k%T<(84y*6l>e1$(?_IuzE7ai;Zw
zt08^(Yq(|2#pdaM;)bW&*y_zK>~DEXG3jFu>uob0J64XOk*+G#6O$!rga?jOEa7Z)
zJ9O1A$F6(#$@@B1(myMnq+~XO+Ji6{e8d7i=p?`chg>ko$VE;=A^dPoEa8p5%{VWQ
zlI+miMv|8-W;{0KKw*R$GkN8<vVT-|qTppw|9?}N2fdE9n*w3;kbac+d^5CfJj$e=
zUjTP^DwE}f&oHiMGr15pn0lOF2|1e+p~v14cN|S*#y8Evj=}|~da49kx2NEQ{p(4+
z*;{z?A^_CIe=*Z%#e?Z)4fMMw5_MfO0X42VZr8cVwuOwxpGtLzV39U?Q!tIx%=aL<
zFL<D_&4i%Y4gB13hYh@*04k_UcZIp)WHmLi>ZoN|al07TiRDY17g^!;RU)iYIgOW{
zTOrgU2=i3=B-1tly-<~IteS({s$y6Vw4y<U(Pi&{x(AP6rK0Wh!Q|=%J#t3;5qqp_
z*-6XKVSv~ZLK6m(6ekOqvYdtPtf8d1IumP;3(30VH0e01*(WfBj*6qWa_=AvK3!E7
zmyroOG`6DfU_CP+SQn%3%A-V2l>|H2fY+|mkgjo>sWxeb%+qfmA?79?99GWO8OxKU
zZ@MJT7j59I$#dp#&Kul1W)XaJ$wAL0nV2;18fM0;lE*q`xVyO#(@JK-upw!<Gj%j9
z(qYKXX?4))txil`B4A%-99oQ<4j*%M=-k!$SmJh&jgGz!i**HTzOgEq7^{c+7L)LS
z;w|Rl`x;1y2nC0@1h9FY&)i+OAKtf}!}5<O8U0xnv`TwA?3y(XR~mj`6N^4eZhSh#
ztlYB(n?7xUAL~3|#EA9mBnLB+`ymEH?JM#5!t?kn_bEFibS)Ej)&+)cc**>uvhKRy
zTmH3|HLFf%)_1tz%((Y>^ph5tWV~f>51WUozY>!9cr@(nr%o-G{KoJS*P+7iicr<k
zgXU}I!Wq{dnB3V6d6pGyx>69lneIS@>1(j2RGYX&USJ#Z#xwffA4N><7R<Z$2s-2A
z*s(F?z&X{AHdTj`vsD&MdWJr^nM}bncsTYj510_ED(1tQt1#Qt86PJP0N$x5OtAMA
z$-=1zFlJREC}zK8_%0{FW^EqMJTwi@tyHH;{GrJ5Js7z?ADH3?hp`EBF@v{&c1PB;
zai1T<C-rv_>VK@P#G(Mx68)J;qsCJ+^IF*Na{!7IZ=(HQ@-(n82Qr>DF+$5nl8q|L
zqQu~Lc&hO=tl=g}=vY(yLPnBHojh`)%N~^L1nj)!`y}>?r3~4iz)%-0GU7lmsyMp9
zoQF|RQ@n@$T&+kvr%RydWIJSU*$Vgca<J}t1M&**V{X4VG*!QfD-5rpT1Or(jS?}c
zA5?LP`FS+4uxI_!35MC$KpqHTH@64{gXC$Do-zAwmntUa><7IX9Xjh#HCh~finXdC
z_~+#>_|uGWuySD@q$`%fmEjB?PjVNvN>+ofpo#I;iNK2N_joQciuvJ)C+vPZ60?#P
z;*YjMQFHMVG|YS?@jO!n7bjK0!@?N+<gG!|w430bLJwQwa~mw&Qc1<JC}!)+8b*#|
zLPpKr2FLREq2AaNOvDI}-Zy5$lcFOk|JKWNT{Xz;2nAxlcPl-q5y#3oJYf$P{UY*e
zbB0m*4R~;TE{5h?zyQHX$!qxwu+zJgF_>Eb#>PHOXyah=I^G5!ejsSij3N2L_c%YV
zp5gzsm!YxBuwl_r#(QQFI68AN+g_8j-dT-L10C^7*Emu?&6JL~u0l>8RYfEDcy{v$
zJ#vS47SeThT)3Sy2!Cjg1i9SPaQ$c&`ULL9of-$(Hp5ED%nU%=CyQ`(XCV7*PLSm8
z@iWMI(+TWmAxU_xi9=rKp!ucg$W4r;zkV)Y_nTKh{nZrMJjH@Era6)G%a+mbH$P+j
z@SV&I6*IIsmO;w<KVo+^=z?!ngGe)JFMuQlyQONl-?@QpU-5#ixYEY%AGL`c@FIpW
zu+=7E6Wy3$-~blS8)5$zQR%EK-?A0=4&j@nPr&rXAsF&UE1KDlrHwmYh%VRuf{%7q
zVVL}C#JkV1u)F|of7yfEKmsrSlB0&TjkwQSiA?Lt#%DRJaoK}PH2<^;UG?{3p4|kv
zQhW&A@hr}1pAUP>KQb!>$tW&92=AuavHZYN9DXdG{Y~8vY8D0oeQ=ERf91=}8_pvq
z{fEQuu(_a;9t{@Z_i^dri>P+@2I>zNf@F>Yeir%ANV#q_6fUGC$NiW^_Z;DH@Esgj
z+Ri?ls$?_C?{~)bj7`}w-}<uW-UypKB@oj6BPvH!LQlv#JhXf)I(5Xe|MarE9rr5#
zRb}$3Ds;5kR#sU*jpRR9!>|ljc;5L5ClwUY*T-^TX(WdxusqUYcMBax7NCvYN$5c*
z_EkY4Q!+%Js?R)w7W0!NcV|RV1*;6kTi=QL6%3~Hly2b6HVG?A@}r#>6CuB7E&E5g
z0=e;e6Bx`IOb0bjf$PR$bXB_*tr!%;m~1&n-zgNqh5e&&WQP;8=cX>Hcvuc!T4QnX
zo!y{vb3XGjq?Wm1zMXBFGl@}ND<<R0y+G^S83>wdKs%P((b@nJ*=C;wjfW$Nb=X$Y
zI_w<syJIm|{|pXYagZ)HDxl$wVj|2;L7RX<aQIm?tCVqz9*!GLHx2B@qP@wYiSZe*
z{wHVbXK2MvefJ1jxeg?H_Yuj%Mh6%hYfT$+vhcy;W1yVQVov=8$~&`&E%D4m?|!MQ
z-f<=B{b?NRA5efz=NB-g3+&)z|7fgSF__63HH6s{ERTCi<cYQ7CCs|4Mxz!@Co1PB
z!)!MJF%Ah}Pp{J^8;(1Z)DJ(<wFTy2qh5?Hx^@_)QpbdsHR18nIp}`&CY%yJmn>Q3
zNP3dBNQ%8Hqq^oYDlV`TO?xzyzSsEzGyLY!YRec_C2tzGg>h)$qED>6;XraTcs#7$
zX-IceWWzahAVrUlz}u@$Ff%y^cMH35Yq<m5x_=r6Zg!;m^YhW$+J_x)`Va@||BfwB
zoM84_eK2z!Kx4)ZqUCjR_%d)f@DzAPJRvV{XG%eT8{R;k0*9laA>CIJ>E=>S!ASZW
zDK&}o8!791n|M)jyw)e%|5W<g_7uKtN8ugw_hwXnd>sE5^>TjG;$*u^UYft36)zDC
z8&@o!turLYLx0#%2erK6Q>HfS^4#L=mYM~ux@4AbCuciAs5z9Ysi7d-R!I-C-5-yu
z$>(ssg=K4R*;(mQy>D2ysFH?dYr?)^*_%)rmNoIdVOiUfhGku;Z&;=uX;}97^bO0#
zEe*>i;_t8)Ux`<(EWIa=rWXH>a<84O-fK5&_1fojd+jKrUOU%{S1c#vNWnYo1_lL)
z1HD%UxvlZ=74rshcuM2B8eBJ{?}qVrL-^e=`EHngH_W~n{d~l(9zGm?Hdj+ibEB@b
zLQ0xu9Hsu6G9HgBjq@)l^Q)D0m2AGv@wW-*b0&P9@o%1VbkZ_Q?~Nk`F+4v1p*2sy
z(Y>SGmuB3Tc1Kx?lhsB;K~^XkuGi~}D`t2a{2ewjT(){&oUku0TV0Bi)l@@4R$v*f
z*XxVBA;X>jifh*Di!<qqYu1wDWLnTrkO@PE>-GBL{*>Wfe#M>B?Ta(*i#w+)#mO|J
zp&%2K4A<-R#r4Q=sUq26qm26E%=+S@jHEc3_B0e^LX_cpy}r0qkm4%7;&QFN;uK_s
z{Z{gC#279=k*k|)rCT#a>zi_AztS>Fp)wtR^EjFY(nnrFT3>0AWd{!bR~frS#^$G3
zOI4z)#ILs=Ce^uthK7IOYJoUFELErAt55~&JpzNg{QWfgnF!1T#vF}_|8qsqIv;^l
z)9XS!f(3#8KMQ<4g2nFQV6jG-e7H_<Xn==~l}=D_V2E3=j)9J+mxqsgkdBpexQ?IL
zS9Z$HNE-h8GEX0WvFs|p5Fe?weWa(Qc^(Ej-;p|2;SmuAy{X3kJyrPMQ%(MRs_B1E
zHT%!0U)#k0GutJCqcQvMs!sL|9P_sf#uy8XO$5ecCb))p`M8g9cQrM(G%+@s;%??@
yVPfjxW;Vsd$lcPy$jx2sVQg;fVQOUQ?(Qm{sJT&#!};G2N4+11<0$>-^Zx*6`j5l_

diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/resources/multiclass.train.parquet b/jvm-packages/xgboost4j-spark-gpu/src/test/resources/multiclass.train.parquet
deleted file mode 100644
index 066f31b0ffa3a3ea1b7a8be7d1494a62b246e032..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 5668
zcmbW5dpuP6|Ho&>@I@jq6XmwZC6`7+SI+yRgc7w~R9h)Bm0S~HOKT~YBvcC7hze0D
z+EnJeKf2g-U23&$H@Q|?Q8wN8H&ff@_xL{ckMBNa&N=VT>zwy<&N=Vb`JB&ZW;xHc
z6L19GDuP!kg6}u?9aGg4RP$ttl0ccm>F4*Y<0)`BoN6A|u%wvjyHmz)scB}TeG8$-
zXEYnH6N`&ycHmlvyReSmhsqj4x=kdUILc}|l-@JLNx@^GCYWLbRROmXPEh|qm3=+u
zG`4?d1hY;@vzIEDfWFFk%*)jy0(i%0ZhyqYm+Lbx`c>%Bc2%mj!WWwZgV+m&_6*ld
zgXU#);I!M$Ow7<srvt);?1?N5^3vxW65As1RNjf6_XklI!v`QKYXyyWMHt|I2wa2J
zVQy3cH2U9$ajp;1TrU|D$}VDD`gJHz;u1T34b)QVVfr<$vI$4_f!PQfB0BZ~7xNtm
z$0r37?mLpsdOMPSdn<TVPk{3m3YmrR88GL*2R#zJ3GScCgQTRrFn!b(Ox;Uy9aoub
z7tCe)OL9>C1&bHDa+uz<7!37aiHGjk(~bLYGsAbaV%b6^61c<~KA4`uTK+=Vv!O&9
z6}*}LHaG?}yxy>NmM*yPk`@_N9LKcUc|h8as?^`C2YOfj!1g3;Aq|IX(Lm%6;UzO5
zP%;j039_g#p$tvWEQQ(U4RBS2E*MO*fR>+sWVOCw(R}q5xG`)bRFnZDxp5E6hUVdq
z8>3L?VH?9LRx@S8^D*Oe5bgQHoW!p5qy1fJDA;!Z+#Z~TirON!>P!i6t&RZ9ILU|t
zrlLjlPE;Hg2$wo8W65Dtob<>Y%8qcUO7?JK;w+NJ{ZtEmWsNZJ!hG00Oaj+7FLY{J
z{x#b<X(5iBXF>KV{ef5JDUsaO>ZFjn7oAq-gIS&~y?(b6I+q;8zg_wM=I(v}=t^T3
zb6D!VO`6?cgtKEU>DiF=XlSe_oqyAdS+{RJ&80u#>O>P_m#9f9f1F6Rb8B#d>K)jb
z7D8v-x(``duE1@-!%Dx?CcGzoSQl$axEJqYeyfPE3ePaVK7dBtXaJ4%v!G^4KQp=R
zAdH*#JH|HDVeHyCSg&S6R@t_&i6dI2o5fB<eb)g9_S?mL6Ltc<CfBe~at@N56X0m&
z4d#23B)YLgk6xX^Vy(`8qJL%zotl+}zqs+4S+%ybXx1p~E^KG>H<yCQX9_i&d>X<7
za%)s(xR7_Y0y1%47g*mvibupTOk%7lj>+1_?8$e=8)0$eLCr0A9X*on_jRQkHk`rH
zHnXM21h&LB$QgVjw&WKjO-AESMVj_-9&6q*7ZS36!2E<VnX4Pn>R~_3y7~xRX)>+i
z&P4;eKys~d9dx8b(T?*#HH39Acz+!ZmITw%m*yn!;bE$L)CRT;cA^5))9j6j^HIxm
z2#tJOL{p|;hVrypAO;2?RqljE=SLFDnKlH@o`Bul3UCtN#2qW+*=rf4Ab6z*2~FBG
z=PPdrd>%-`id#YVtr_Lpr+~Qe1PbFGIKA|ahNnBOL3T$elkI6nyhYvUX<$QRxgm5%
zg*qCos$_n<)C2wR(;-OPidZd5CX1(EM6XMCnVI}6aPLkuIQo1CaZL}IH&sJvTU-pj
z@z$ZITk@%f?kbpa#)2|SG-+PJDR}s#1GP*hz_w?0^k{)Uv=b|mEqm(!EM81zh@UWV
z-}unKUD?q2{M6uo>MM&(%h_w@3Xm9{%N+8)0~*nU>@`h->JPbW#XAYPJHrq<49po*
zemjO2T*YyL-@ziadMMMbfZyZFaMZ6-T&>^%mcFmp?z*?&7T@HgxrayA7u8`{(S77R
zHX$|j<EV5og#e8{JkFcXdZ(^}_e-?Mq^)<^)8kmoF)DzRoMb#`qfB=@&IEo)C6n*l
z4NcY+7@3-kAB1rb|4S#!UG_UTrCN{&dXJpOi<Budx{gs%+k&T;UIIU_gD@uA4_bY>
zjsbn>WP8GrEN=XTt&cP$<L9ryzT5LDNBmeCWuQxY-Zr879}&2^M@XNgcCjuILaKQ^
z5|>R}2>b)-u$LJQ&HI$#ZTjzwo5K~>%hL>{U)kciaav@|I|9XtT14Nk3tV0Lu`Z(s
zcDRp){we2KwqqFKP1B(1-JaBM%Oy-M_9f%ECM0UFCiBQd0~)8VCb9i<aBA}uQ2+S^
zxQ!k}?n^3I>8~nueb{wqx>Acl2}R&Bo}yO9*DyO?lRmwkhPJa!ur#t4PyM+S@QHxv
zYdv8X-O(idZV7C9RuhPOjo2eY>T%7o6d2lafH8mn42PQxBBi05(Ag*)tEyfw4_BJd
z*AWMCrP(=f*OoGzH$TIpRUSkvJ&E0}6^zUE3s6_>fj8O&G@|e<LjEk+zO~J9{#;M^
z=Iiq)dQpreRF{-p(SfkJ0XT8Dgj^jW#HWKMk++$Hh-sP<4*AuS89hS;F|Aebw7MA2
z8+5Tl)nBtty_-;Y%pDD)FT>xiJYDcEPW_);sTyL0A)-(^O05u&Xrw@<M?QSjl!rxL
z3!!A^5OP+U&O}Wpgw1u+U{7ZibMbl(yZwbO2?@%<C6}Avs;dF&tHw*?8qR@#0*gO{
z#xP;qo0x{*3h?(LbGkFHjrky|M~6eeCRuM_#cSuW;!U3T{KFbdiV1^y-Q}oRs!n0G
zFRqSh2akLyzK+x;D@~5zea~}@Q}Zz=?}IM1SotbzyU&`ns+<Fv>eX;JD~0jVdWmpp
zGLzkC&t#N7W+x6;$BjxGV4k9YL>zdDyxwH&tk?+~pR16J+26xBtv>LZtA+PSw$rXx
zX>8k9TG(=LJQBrZ#?Sv1lq6c?*kDuo&Nu{5=`}G$n@5oM{0lfn-3L0tjWE%s3WZ~u
z@O`B>y=(A2j&&?&OtejL$;x!Fe{$1l$D<x8xlzYj<Wd~dRN!>|7$M)el!EUcr?GZy
zGH&^fN0+OYz%?aB8qe5}hVJ+H^ic|&e4oNrSEaz8JarmyYbveiT7j95Z!@GZjHyq#
z!3=%rB264|3N4DxgV^B;Dje0to347KlJ$VAAs0X&icwG^W%8*4dD9@IQ;sA;W;_q?
z=M6{4UoByopE=3luZ5`z?YPL=5!KH|V^+}+YT3;pEBThJYqmb|t#M|KSq#Q`rC;OQ
z2@B~`K`PwxM@V{52iD7NFfl`)YIGdJmeP7S{wk48iAuvvMFG8AJ)Ioa*z1%Nc8a0H
zhY+>B?rcbPE<DhzhGARtA$6%P<5P48^l#N+-lcN(Z&yA)khb$5`^xFle2{BoLEpC|
zvN@}E!Q0(Q*fpjM2X8zIkr(!Wz2psq7_P?|{A1GiZ_lD#h90O5SD<`90U15)1XEBj
zQyRO!fF04cpKVUtfhN($^ma=Tn<^V;<`=F8gUn*^<9;tyEzqZ<7b_7pTEXTY_kpOy
z?HH3*CGCDc72iIq#H<_zu%9`MiQ+oavdR5Yx5GcNg-5GkzDm5KFxr`}(^`kyjBdb~
zB^x1b+XKjH^Q6u{Z3Pjp5sv51LYmnGO3Qf6&E0iOW%g{eDtN#yj(UYFLJ=dyKVsdu
z%S_ww6h>69N_6%;gO&v%=08P!jQZkSrY7kXRy*b};gtbk9@7T#j~+<vPS~TwZ7{W(
zFTq-FG)^Avf^K2ScydBArk}~AY{Gt=7wtqlZ(L`*z!m?zsYL8=+K^GHy<oraDViGQ
z!bOP!5k9Gg^aXw>j2I?$T(AHa*-XI0eMY4HhhFxvg$-UX97>Mqe}GW+b5K9qhgiJa
zMDb!gZt!4ne$7l6`NR~i#9qZ=b<6RadwX%I<Ov$NdO4{iK11Ol2J)H<V0%#;YE<y4
zd&gY76Lb_ZFSUW==qw!lx|%s+wFGPqZvp<*pJDK+1Uw(w#B}iUfkX~t+GlLT52tDw
zhpmOE*)WREvww#XFBEBgs~S|!`@_lm+C$b}I~_Be{ozpBJ=|?D3sXy7z`I<JxR?$B
zuMdN$(V|BX?{SE!&pXflc=<Ja2$_IG*ObAS#!|-N+i&q{ObWJ1Z?R)-3UR>&ZSuD(
z`}XX}_{YA|SYZns`LG`pJhNHvSBr4@y9V4*WsJWhtJAO8Zd{h+MMHZV(P~2$I1Du>
zqMkaOoH+=1ol9srcRi?H{0;WFce0JR6npGvIk`hFO!Vkwdo9O8{Rc<FtyZG9oDE5M
z;|LHBYR8ifEViC##}<`!=&|VvqvVEco^>fkcbb8H=N@>(%Yy8K=RoBb6WUvv0Y8X#
zu^l-rEN7`Mxn*yQl2^;<L*p*#;7x1D%N||kVZ<(Q+oMRoon%X$eP83vx<u%&c?s!L
zJ8+6~KMe28!%R(WQm39oCQW#YX<2r}s3iamc~i;blwiVqoe4kPokQMo)!^_~h0^H_
z$y7Jp7<?fGmd<<(iK%stho7HhT|4baVSx}YJeh<pWHGZeq*-bdHi)#PbfBuHG1Yc3
z#dP;w^vMM^(m5%XL>8{Vh)8#8#~0CVZJ=HC+8{EO&1`OaGPUJa3^HR;d&hWo$6Xg1
zs@e<5UP79mIhJhBGJrvE^$0WaTNrly3bU8HnpRguFw@SBB3T*M=ny!W3EAWiDc^@<
z%h_w#>YR#Afn89XrAxAJK1Weq7uIfe#G~I7vw^MN5K|k6_YbAgQFF$V6T^;T(L-gr
z&u9^IQSAsrro6;WJDvf5`YG@>oe#yltuXIOFD$&?4uS1_N?)|Ipk@Ycn1$kLiKHhW
z0nJZtXGUZQ;9JEPXgSh~L=Bov7lc1S#pdy>^K?g8Fr^W*)MBNg<`Kj#aXZ_Z!Xctf
zg&29poEY6&Mg@F<NcOpNsC<W_qd-l-=Wuj597V260pW1sWD7^O5P2+r*>B+rGPr`S
z?qAFJA7A$TPt*0OJuZJu))g#?y{k*^fn}T56rC+Gy>x7!Fp`uEc3(7onVU_y%OU4E
zt}Qn`11?)HGcuZ-8DyCqtm9sIdB6H_{)tgMj;<z8SBEd3P|7y)xu$@pE9Cqo%SZn*
zSw49fD9eX}GFd(v8z{@i8!}n`>KG`?J206n?{EH}EOUxXmYZ^*EW7$Rnatr7e3tcu
z%7SuL**S4^^@LTbU*cx%FY#8xFLAZym$*&*CGKDZ=ecr@?Dw|1tE~<54G&zpcKIqv
zu&+R!BTyd6)8Q=_eGcqC2V*`5;?IHo=fL4ppcv%qCkf&Rt9iP5x*N=t^#sbg4jg4A
zT{%y{lj*`jRbjcRnX==jd;IB!3po}a@A#*d?CNCImYo}iBYZkpAQT=V0$Yw*m25iv
zg=RO9R;4P#$!nv-mlsNo`_dYSi=QIU5&rrS*Q`AdH)bHNSzCsa*HniuFR&c<r8N+j
zE5{`|%g=7B;Xs^tAa1Ln3@7)34qt8zIqpkqAnv#vSM?EBZ8;EUKM+@KDZ|M<q{EjR
zN{;)|8i>0j$F+RKwTTDf90uar#4?=RdorBNh;rPQ)<9f~9G5X!ez6_QM;u>X*iR+@
zMBK>}=JI5F%`zChPtKLEWo48><v#w@;^<n-9(=y6zOo|AHyq&`Ih&QUh0P#yiJ7vn
zkZ8+%Zmpve8otujH_TV&PTP-0wOuC(UmFk_qN6Cbb+EPL=ve;O5o^~3*~&b<CQ=e%
z8y*^E8!U<N_4keN)u~ZfZyXU5CNZ99yfz{{a(RTYwegAoNs#|q<B5yb8;AG?%Xcjo
z$>hHdTM-oMD?chEGDzm_AlYtNp2XVtGtzkC`q)_OFR6C_K6T8$PZj_BRQrFQ>hP~q
zKXw!UWA>6*j?VPIs%jG)ZuFN8jO=Xf#I|-u7JiWdLH<Vme)e|b#de|z{tkZQ#P*Wq
t4im&8|MBBQ%l&;Nc4O@%_M-9r{(ioex*PO3od5oEv=lj<S+f6!{|`tp0SN#A

diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/resources/regression.test.parquet b/jvm-packages/xgboost4j-spark-gpu/src/test/resources/regression.test.parquet
deleted file mode 100644
index 64036d134387ccf2c4fce9ebc32cc82531799321..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6010
zcmbW5dsIyO`^QK3h>|9yQbL4MO%XMpXPP-wF6oFOIjJV)n?xlN<q(lBNV!GQg(&2x
zQ1f|acDhKRh?87GuAz=gkxqUy$MLPTzU%z)J8R8a`+45`^Ssvn?9Y4l{yf{&VZJ4s
z#ipw4YE^b}L&e1*`s{o;siMSIVX+=5eEdaDp2cG2%MCDHR!7oqu`nVbm8gB-!jj*1
zakX9-64BVF@RBCu2Ae`UM|8VvN!(G<r6_gmim1XOy;)%4F`qu(pDn7|H5V)IX+ZWZ
zQ}Di0OS)N!zzcjtBi=dS^7j3(uiFfVAIpT&w<pnAHxfPu)Zz*E9@z3n2YS|CK#r;c
zk9Bq{2p61#ko{U%w0xB)NmX|E=(gH3VUzA+?(JS1Sf1emO8(A)5uT1N{=THZ{e
zqj#0)#gx(t`(&6j-jr&v{lMdm0D4bv;$Bz~N2*LSD27$T?~!h_^3+&Z?`q6j-KdN;
zNl)?7fy=n-XeXFxdD2Vgyy(l~HX5U#%-b4aiz+(FpkX!(lE&`kMvQ4j%L-qy-vo1%
zt1v^4upPv)A`idB<&ejYkLVyjZ8TdR&$Vi1@KtLFWGZ}yp$WR;>xRp4_l*$buHOUm
z){MqMk{mof^(}Q@yBJ4i%Zcat=i^V&XXuB?gYnauofuH<14O9+r&f)D8=qnU#%Z9^
zvU!m5=M4->I!wPjZl>+t9Yk(oE<P>K!%NR~rE@aCV}Z}`tMe+z@cfl<Cvd>mMu=a7
z2BAsN#xl*l2QVc19k>e@V*P7<-r?-KxRbG_q1`5;;CofvW4Y~kO*viUBX=1u7&VJJ
z%O%vrIT@_)H<ELM4<g@q3w9s+lPK*`29w1qG&<i4T0fjdkNRq2eBTWXj0-SuqYCf!
z#BZR_-2;oKO~t{U*TK0smHgLz&6UXy)&9wS?85?#L~ZCzm_-kaJc@%J9Kh?R%^+r8
z3GxO?fOl{UIBwg{9r(vboNd=5QcL+neN3Owmbd@G=QH8~kH(Rlan_>gzHN{=$pov?
z5@6y%cVZt?$}Q5p0c8n0v5}odvm2B!JXlp6wsQmS&p5%=Qdh+;!)qjV`)F~7MkG9C
zU8k<IhKg3dF@%Au1fY6m1kZoWBeLwX7}F9&Tra!pG}l)dpV-OsZf==DR8*frnMo*Z
z9dZgz2k^yiO&?&fioGbL8Np%6K3t<y4@aJs!m+`1RDUNI_9yMM`&67qZk|xV=IS_6
z%_0}L`q4z3#MvmS>Ien<$@RGU!bXzLKL?^QXUPV`KM_TKFm8C6eMnU%W>qGGxb6;j
zzgab%ejx`Y%=86LRy=i)cc9FJPG~XNOdKOW(cw=+;LG$$^!PGwG<|=PDo*_kt$M<6
zM$a{9x>i8S#gpMmL<o1&$$i+l-2%6+))4!uUf?RuT#WTW8?emY0HZ&rBJFqxI<?B+
zCZZze(PwC*hZXUhzg?u0mV{LcW5M$Fe{ka4ouo8%3hs}2K^Gc~qGxZog4?wHM39n$
z>!}E%H=4nKQ=1?$J_@E(T%$(IEU@6P8$FZ7=DiwTNb~(K!i{)8aND<=IvElSh`2#+
zp504CnJ>uWrYu<Jp9@#(y19=%R)Bz01lzQ}!QKh$NlH)<4(zV5S1&w*K1<xeFmpHR
z?<gV6(Usf=rAx5I;2!?heRYyc`_%u*eclyyT)&Gq$<o>xcq*WghNnkhNt_{_)$@X0
zQ?Y{i>$Gv4O(T95v&3aH65x!WjlK$D=o0&RFt^bFmp55r)aeM^7VH4tn$74x*#%My
z9?&&4ThQlOepy_(4dhmihgTb};-%?D_#wHI4sthyfEl(_IBEx}d_M}GPanYh&>l#u
zU+jcH^#;&5GeTS;9ESPqop@)6F;4QRCK)%c(;<(yi_ZJ+fV#03F!Fg3_?<Q6KH>B5
zm-WBG0^L~L=k`cMG>eEz`$RYpqYLLUro-p(I6RW;f^yTwP}3`8v7+V%y_T~AV`iKH
zhhR^bl->d3{-~uI^Jl|0)xFg0_oKuvas`c%%jUk;auqFwi=;?&iu#a2U|mx{J4avQ
z+8+&rt(T8N4^`sXrPR`y0qJDt&|Ua4MU{Fk8jhZ2+hAJ5VDXW6`sg%8A1HCf3*L?7
zrIIyWHS!geQ&kWbEZ76b_H09OPXi3-vf?S%-p5>y0d$sxp(vn;`g$UEya>V1N9-Uz
zS^)k}{-7rXBSHLi3Hp8tqb<pW*ceH<U2``>(VkdRXFd^6cn8pMbpo8{1g>uukb-#(
zw%a&Dp~g~bTWlxd4~xZ5ReIb9>yD5CTtCs3%;+++B16=jpNZajn^BpoNM@WFhqFEo
z5@!ol(RG(L*lj$D*Yg)az^Os@ug5IK&8lC>{j_RP-i}7HoEwkNho?hOcqYE}OT-O-
zguq&tBPiE)3i{mlxFzVH%%|a)#%)%iaA3hRNK?p$0U37qG(L;G_RkiLzbdA^>vVWQ
zmK<cnexSCv3_OIrr2c&&+}j#Mmf9pzIqN~<sH+A5UM}SJ121&lEe|27LKr!(g0>4C
z;6eKa9BF338y;)`>@TWl6&g>6X&y$C*c6dSOAo%3oadsACN&H2;vW5a2r|kJ;kprJ
zq%63ZtZCQ6gSUEc<TF*Q?;bA>kGKVQ-W?OUdF`QM?K*t9C<KN~Sc0|^ws`#A8>Qm&
z>qVa&*~G3X7Dha%w0|?R64u?Wpzga$fa|ms^R_C8Zxpk*78}IS=&Qu@eRU9jn^i&1
zbyV6ndLgoJ6Y%(HA2@$jfI0JHF>(HN{Kah{Z78lMo1%w7tLFr=z`BW=J7xo`?G&0`
zG{=SA@;K#{2b3yjqD|m^k+bJ22p4RpW6l}D@nu=aS^W-Ig{wf@!tInyi-)xjC3vYi
z8fOQ5phx7BAUHS`FE`l1!gGh{r_u^^S$2df)@btH4{bw<q8jpd$KjHao%jbV<>jch
zlV0;w7@C?cN+`~OfCG-8Q9^Ljvn}Y?`IJW2)YD^DqcPoTHl29t8fl7GhWwUO^i!8D
z7$=%TPq-tl=)Fk5$Ap;woP%?A8lz5KBRs2L241t-@QaNf?DI&(ysX>wM8gOyUF=EB
z&2B-#*cE8#aGey-<q(xO?qtO~cWm1AgxbfbmHiqS02f_)>Cl!KSVUu}t3(-mZanCF
z^mXh_j8pn2_Z3V|hRcC9nDCP&x@*hf9zPDw^c;$ZPqc9(TdQeTT`zZ*hA+_evEn~>
zOvR;IfufF-a?z4unqZSBkGSX#+2pjGf}Vu#dt!x^i$`GPP-iHsN`+hUM))n(35*|&
z=B1B+3ZFrn=BcXEPA>si<(Wdz{cJMcs~o*YO{Mi6u~5ZP1l!h3q8;W*qn2tB{kcBS
zwP+>Q<ljY?+r#LB)K0RAGas_w`vEI?wdh!H7FBcWC9^7{aJ1ZKs+^k-8@to+o7x62
zy1AE5&PYVR_N(?weyxY|tCpkQishh?l!q_mh2&?=Fi<BgXvmvO6fZtN){6k>t;^tU
zG%XbwU;2ctJN=<H;sVVrO+d|#cQDRu7Zt5>rCl6%Xc)5>HTT9tdD%ju-k1Yv!MQ};
z?;!PS+sajwe1qAEmuOyNJqB_kvCtv~*9u$9!b;lFsMV5YDUReVHG0Ce9+8P>qZV;<
zbbPs~sVcnn?g|*(y@ULkuS+b<t#L`=TavAE2~#-AV1DZ%e3{V*`=%L#-km(6E$0Tc
zDuqB>=0e5bq0lu+2xFBNL#%K%1b#gaCr}B8?|z5LV_Ufi9mDB?pcgd4zEq^`+-iRp
zs_E0Sxv=?uC|P;c5FIkZaFtM(j7_V83aZMz+cOpAezUi~`*8@qbcz>!+4306FBVdH
z>;HiB&x+!aDj!JDnLXUyP3Ixw=sL(tEF*@k`E;W8NKn7!2+@{9dCGGY``mY}vG>@Z
ze{x^1!x>l;-h^7pBjDp4FWmdJmF!B5KzZR<Y%=vkk;4QG%Phrn>t8}K+Z&@#<YT$z
z1YCVOk&cM!B9WK1#RHvPaC-GHvT#o^DGel0=&Hh-tb7IDAFtxZPqM|szptl5n-9W^
z`>Evgko{1*=@JBPbOA?U6mU<jg{)r)&2I6)@FoMO|5Fz)F$8Th6)=0%C^(s=4)Skz
zV|wUmI_#o7-8hGmy47=OR{9}y_{xLUHzw0>+wP@vYEQw$^z-!BEqpA!RgOk+lW9#(
zGQO&rjZx`Opkq`GIoO^878;q*k>d%$>@G}Y&w`@3m!zZPIes4J1TQ8uli)`yp{i~n
zM7Ob^>jsLp1r8L?SzAb|o*$$W!HZts-HPMe2V?LuU2$$n0?fZ>MVoJ(z!&w}sLnaf
zm3yE`<1+Tb{EcTJ#P9+M&kn@9f;O@`)=Aog1fhkw4~)>^VXWIth#z4`8){78+6#nm
zZyoRwa`4gi49Nba0rne9n-%Zf<cy&Zr@fnv9aR_c_?%d*ZMbURo;yvHI!cLX4xd4a
zluJcb(JchqyvP-!pUC8yyRlSr6^x#y4qbu+RGG!YKo@7+6|oM#&a=Z|>}qJdoCQMN
zYC3#}9*GY96OuN^;?FI1piz7Ue-Z~lrQviuI_Uz6KP|wnhHW_bOQooI(=C`_a}s-b
zX)w^)0?&v?;=)IM_`u>Scks1cSXwX?23McNDG@_?Z6hoCnvO$>;@p31d4)}S`~fK*
z{QQYpf*pqsI4p}!U{cSz^Ga9;n9-R#Byq*&0>@F!%+~fSe)jKM`Him!F(;f>@~6a{
zk_6Yv^N&A`C_fZ8++mjE0Y3lQHs;Hg8a}Q!6eLCH@gFzb;oImbmYch91j9b3^KYt^
zFmnWoWJ4h%X}BYBU`9#!2fe!_hEt|E1W3#nwF{5<6O%48dwxrC;1y)^yQb=kjn<AS
zj}6XcSo0T^7o4Viqm#*egNG%O<3G1BcV?FG4Q`#5aK`K?*WdG)A1^#EDKL~c#Ak^m
zEuEA33a<C~$GLkM_sx!i1xEf1y;j7id5_?CzZ@)3pF;W7@e1W*7jXED4Ttgj_L;e-
zew6<zXSBe~Du<sE^@RD!{xZW=t!Fg%7xUhoj^fvzu8_9pub8XPf9JpQH!T+$*-8$^
zI5EtX&3t{wy9{eUnk2I2JU?s<b<phH&(Gg_hKcZZFYkHs%;DLZ#p0USnS8%<F3g+A
z8YZ<1F`#BHf5*?=Oo_@He){DLlEc~)${9Cnhd1ss1-Z)yFbdJrBnS4qWQ6kXnfhUT
zLAA|PNu0@C{y_<Ki0(?{r{(VE%jq9wE^5XzBQqaL?EiG)#mJpxOqBj3(OC417xud?
z->j%cVqPcUhd!Aj*86;inff-5&nZ(DJm1^S#6m2uoADQn*k|h?IPS!s8SLPY(=>n?
zGi5sC;}Xq0%g=CNo*m(Pc#UV?m#7KeB=2@;D^KEAe0{}CRTDA~zojyD#v*=O+jC~t
z_(r}#R1#DCn}wjj$*5d+sGP2jg6v~~^dS4zz?Re1X0g7@vMy07%Noo6vTVCAm1Vo&
z{<3V-C6#4+kN&dkZ7h{#FV_CD?6Dw~W&eWyvdkk=SvK9jWqoZGc8{v`npnE}+OY%s
z;>}upagPDJ`^T8haeexG%f9%X-QTekoZzGuw%R8&$UDq0&_CFRJ%q(p87rqF=V$RF
zu>28N{RlWe0_z`v&G$fYrH`-wN|tuMoUXp^MiUi%wu-Kek_t;##$(G#b?x`6+C8c!
zD)!&+@%J09%^LsL9siz5XD8i)v)FPhDaeVft^Ili+ni+*JFq{^vOg_$pcE%t8yyAN
zLdkG_S$`bk#MaS1FjI!xtkoZ9)gQN6ONx`Nsg8ndfn~V9tUvCa3|ILVuE(H1j`JPY
zp{m`jYSLpM#mT&&qaZVe4A+<S$Gw-~KK_O49M>Ob-T&%3$4PNA59uh#3?;+$W&LsS
z&QjcQN7-cGTlUA<^vAuol;ULG(@~HaQHJZw`s0qva5w+Ly|e4XeP8na#CLX5qRh5B
z3NjDNh<#aq;$<1JS4PxMpCR>ziHi1<8Cp_zPtwr|2@Np!S?weBr1@X=G>`BP4O<bi
zN=K1nZewoA(lPy?5n*dqnoFI$X03m?d1%Nw^I-pQAE8gUk4~9<lyP|EYJcPD#$n;1
zYyHBFCmAna;lENCW;}gKl<_K`VA&}@3#t6~u;nX5d}O0mtz9W~^h)VzX`TNh;~z-l
z=~0_DP3kMP{P(3+|Gt#-?@O)!eW}gAF8%A_@c*n{x{0MT>+e-H3l25<+XhCK=9V0D
zOQZ3=Ygeok8VP-^Ep0iL7Sn__zEe5Y{(d&oI2J<NsTO`hAAid!mj2cjwnCw=kE!lP
TeHQC~e=Jo+7Ry!o)&2hf^LOHR

diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/resources/regression.train.parquet b/jvm-packages/xgboost4j-spark-gpu/src/test/resources/regression.train.parquet
deleted file mode 100644
index ecad3d4fd0cae564688c479c4823b765b27dceb9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6008
zcmbW5cT|(f*T+LIQkKO;sx%b=1)?;OXKoUzB3Kr&qf~VTlxA5h*io7wARs7Z!3t|H
zK;|X^*GALb#R7^26;ZJ_M12!oSI+sJ{o~zpa!&63&fL$L%zQI5&ohZXh0A8KsT%v7
z8vFFtnMUk`^3q(Ht;%9`D834kS75PN2j%5*EfVR%#A)zqy&^Agg)`1FY#^sx%tQi*
zUob+e64u@dqfB)P3GHx)3|2WFIWi6>Tu#E2qClE(tsGo_aYKLCQ1nbgaJajN>OcB{
zimXc@u|l0^&R;3KTUAPx<8KL1UtJIB1MWfD!;`hI4Qj|~4>1-mD~IXV;z_!>Jr?_3
zff0wtV)x{kct8Gkre3rfI*BHn8XZ73+D~AVK29cS?~=gx!((zYOdImX5aAx-U9=bb
z!J}P)^zBP;7^7{Dp5t=CP3S~6&#*wd9y_9-_!#e$+o0p~S{zwg=Jq-FBt3QY5_(tV
zf=0n$C^Jy19p82sXPh}uV;Q^-55L(AY4T-cx0Z%zoBUj8WIcmRCA}o~xthp-{uCbJ
z=z)?#BfYRanTEf7N8abep`LdTUN^L(HKzlp$&?flsZ<F{{#In{R%O_6QU_kFt*&($
z^%pVl;?si33Fx!O40ku=(#X$VVELmB=DbqiC7*968@Rhb?6@ACBvGJRGYcNv+Kq`u
zIgDZPV8~z9f-1_6n4FRc?bn9joN4|T6*?XaA8Uv{9WG!P?tbhZ@&j(oO{EH_wBe3J
z29^!Ugvg5VjEiU`4*0N-?21!?40UrH{cf?S<PU<b4_)YZui<zl`8SgB<0H2nyM?gv
zR03_)$p?p|bujgWEsm1ANG)xD!1k1<@UX=iY`N`tW0(d^{n!kyr{B=yo<n&fB6Tp+
zYb05be~5B=^+kJ%>+v7&)re0ess5Y$woXZc3D37f_WBYu+j@<x`<#TimUD>i-bUKh
z$)ni`ugR%OA)4-{aG>@Q{cFCi=<>}aI9ae3*s~Me@`^m*MS=>2C)=_6o~r2a(&g|&
zvmMVa;wL(`#0L+jr$F#>cWTfTKr=6-LrLaA^z*ENU%MGRZTFHUe{7-tg{d&G=x6eB
zE?{6rC$oI*JMtu@gswcQga=X#LH*A*a_hxHTB$ccGy~KzZjBAPcMQPz&hco<+ld~E
zvoU;PI_#e@37+1~5l*qTp<LIE;Cd#3cnh4t*l0EE?Ba;xE*zv2x7&k`sj`SuI+dvM
zdkBAuJ51X9lExexhb`+oF+8%DvKDV)Hsu7u_3;-m!fg^}=LbQ4kvj;^YQWoQ5j1aH
zhjS!_7&~w)E(*6saoP%cOa24QpZ^}MwbLM}Vh<*l`k|4DFJ8FzBiyyvPMM7*bdYf#
zjcVAB+PYIkkr)4@TB;MtrO>rZw0r_|2lc^*ng{3{SO=VKLP+>5rj7G<(2}v5ya}tF
z>4zgh%$cM(I%G&aE@zzqKlyQ(d$ycBxBpQTS)(j^aQ6TSY)!%i4;mrm;yU7bCW$o6
z+6SRlZ*Ww=IoS2m5JIvC<7BsOU=!wzmt2kE{JKFf+f*M?N*ChUDW2$??G5?!$KeCB
zG)7#njH_8|P_?fTx?g;vtBS19d8ZNDP6?rQ7i-Dz>D8qBV;9wUnM2Q14MdyBRWNNQ
zhcV|@;b(3w{l|TK3VOW%&3wZ?Zbb7nk+r@P<_Z(`CgPJkGZD|;2Hp>4FmZ4zc>V4M
zd2cIe>lSrXdGZ$qt~(7k%1hya(hwZL_>r>f$+)N@20wVc5*m1JfqIWp>?kGJc0iXm
zkDX0k4ZDNYS{8V7XaodxH8EX}2a96OW)tP9s!YnE9Lnb(BS)umaZ}W3C`xR^>C!E6
zt8fF*seSmC*Fhbg3=(}xd5VAS$b`B+W0;mT3&xHuWIlD=BpG3rq`0CQ77p#B$3>5c
z;)iYcdHxMJr>=(1%k<&b<?^J^w+@!}Dq(o#GwS{N9i-prz->8VqIBvuh#YymJ)0|B
zM=$DuC)az0#?2h6>bH-6U>4#FZ+V!x=uGVg3n4WSw4!;ZnJ6c4Gf1{xfr0mIu(@SA
zUH14eW)c+|nsSeBm}L!D&s-vD(*gTN3`Sv@HjXcr1AFZ_yb+^c`!LIb^!?Uwnj3MP
zdaCY2^R|zS-9}BOJ;(%xsFq+1Uz4c$ZN>ymMO0gtOdeNi!vIA^k%~?=T2(B?U-#(q
z#BuWIqL_?B)*m5l`wqiEZ9VM0&1Lk`iWrxm`*0-h5R?`C0deEZF>emR?iuEy{frNu
zJemoQQn$l}S}vsDIfuUUQlNe;2Q{L9#^lR=U>CNRNmEe(TV58m+h&Iklg3f<NgHD}
zaiP1Iht&nvbcS@!pPDWbwij5#vgg-GPFe!Qnkw`72}sVY0*r2{hbNPNqM4;zu)ePr
z{xRQ)<m3XCe{-KV?<B;uo@c_34aH{zbTC0-CukPrQG8G+gkwESRp?#e!7?A1b36e|
zQ^ykhXG<Z?G?6qMc}iB^eS#;qm7}1rlPsUlqaHibiGoiRWNSvF`0{DoV0M70DU1X!
zuhYVa3tH%3EQe}F&)uxGVqiqGE77vlhdqYBGSzE;L(aV6wC=qc-d8>gsYA}w0_7UA
zJFo-Vy-bBMVHId8v|<JYSc{r+edxnZf4tPOo-BR63=)P-#nhbf_|w8$H0p2*(J1%`
zUe%3^L;HAIIk8*VrxAoZj#rTvM!R61hBKXAVn`ixhv2;^W1d>zc$jvlYi!xTKheM4
z7SxWYii(=A6QSWHyn0xZ_*&l~S?i6lbe;odpO}lglalc+<>2Oy&BDQzolMrPGHmh)
zW#qJ;k$r>u=-`M=c)DvFaA!UNPRmv@{FO5A=Cev#-wjyfz6c-r$cx(EXoxr)Txi)c
z1AIKr9~vTjsc2C-<tjSiRHqZD8S;eMYpLL@F~#)x2OIEu@ft=u1fWSlHQGE@;wAr~
z19!d5!A0*AnKp7O#<YLJ+uKw`^kP1!@0BCh-v{I3-vY4y@gdw}a*L#woECb!=)=L7
z+ZbxnNl@=B4ifzdJD>uaINsRglrA(ImygXl02UT&Atq}kDn)qHF(0KaU)hAO+j8h_
zlSp!Z!UQNc^?;k&2E?Rz1-aJdfz!A_XsWLzTJB#&hkm||9<NJL+jj%jz7&2xc-Oa7
zG|T^+`!;^E0*%cYxITR!ec+TrUq17M%mo!hT(1MQas^OHx*;qh9``2*cso|t(}R3h
z_-jHZ#ssSHwAoqYWkUh|Lu?C<p1aViU;)i{KS|u2QiO6BcERi^s#N<yCBP5kKp?$Z
z`d_(5cBJQ%0aM%P<g?ij{yq!$o;goeJbf)3{+k=GLq&pfhkgK?=eqE8VFPZt5C(ha
zwUArvU??`_G4C}8;SzZpykEKzGY>`c)`^lBeUB2rX_v__=X!|dhO-cK!-{vuuo{yF
zS>VIOCye&fSnABa0cwGp;C=Ze%-F<0TbC*vRB?uPR|i0RkA;W_snkwe9S@{`r0>cE
zB;@sdT;=x^M%Kr|`Xmu~knBy2-&Zl^Z95@Ls|tRo3l=_lf0#aRvZE<%3!JHFgFP3P
zLVQa$=EzA%QRG{<*C&(l9kUZTc8_tR-6V{9ks-`qU;qn})i9{^DPdKL$hlT)NOc~G
zuLGlrda(=hGG!Fqy4(mP5gS2$SUWlu)-f}WRDz+#I`}*)3YPA1#g5hpX1QY+TnU(n
z>qnd?!FlbZ$hQF%H}%rgIsTxpeuc4?DB-?U4UBs1K;iFf6Q)b}jwTIlVIJL3VuE_N
z;XEV2D!+@geoG#7yje%Ql{XV`Itk{tEOA!a?_irCFRV`Wg^i8I*ld3hb%sxciAVpW
z%e7QQYrGG`gU_{eYD6y--dih-S!#^*_(e31r?hSR1#FwCh9kPPMgQ!51xMP-wf@b0
z3!>)0G<tw(Ua$yqvMV9ATMONzQXu=B4^++E58i&M5IXx8bQpdd<w6TUtz1N=3)v8@
z;Q)AT3kaV0VD1VH%*&96hk3fB{b(oV80Wy=B4-qq7eZuwC4MecfxKKDJSfp5Yo6P|
zB;kFbZ@M#9nVf)*4qKet8pzxitq0L1a~i!Q2+T`{V3Thvygh19=K5yggp>~$_-Hdp
zx?zfs_=E9CGg6-^BeAn<G&He0n2Ao;iNRr2UbL+OFFkn`R18I7^_(o`p*RiZ|LFp%
zTb99)0ugTD`q8EVZgk>}vvA_LHeS#-CLWnQjF@Nw$GE8sJ2wed&9#9twG`aubDe&i
zwu0pEABFF{*TUJ~cTme&IXE;R0A7}*(k<E9gn4EOjqe}QBWE{Z@73{`ADKoJPUeyO
zA;EB8qeA$T$pSQbk|FFJI2;OfGvIn$6s(y>N&BH*@?&8+Y|q(&tBZbz!t+~j$z{db
zyqsh@Yn(q8`Zm+-ZK}L!x?%94NDd0}cN358wrE`4f)d^#dT47l!dy07ey|p<W}P7;
zcA5!!Ik&-ob|Gm>I!TW$a|iZx6`Y<q5d7?((c6<Vv3doY44r5ITCbhq+Jc#2I;MbZ
zAM}ZAe65N>6KrvzVmaPhLwLEi04Av>5Pv5bQf$*O%x*25KcNmQKdqpvk3E1%UB_W>
z+Aw%kGLNjhq6sanzmg2qLA(hIQmFfd8fNLxw`7BA^Y@eYX<<#V(!aScHOb0-*C;2!
zj>0jLxka6P(JLiEW5r9p)z&5a`8O8`x?3g*s^lMv^Ydm4mIX}^7_DB;@5r0M53;He
zsJ}fVF%4;yRF>`KnXkVlp0~h2(p&PAz$0L-!0V_+UE_W~L5A9SetfGD-#IW<^7*5H
z*L1){99+{X@chF_B4=|*Fv3YgaxQ6=q|{JPu-r6R;!!Zj{Yssg#L}TsP{r*Q>{-l`
z^zJmTOS;_2JJXm}_vGq4iO{B2l9hi+++v?AxMO!p;uO73lHhhi(6D%b`^OeX_t?Vg
zI3%cqKeow1AotluoY*WBbo)6-Hm&?1KDk|+*W)`%;-GKpuG>-|u<&^=@+!C@X}W!z
zFZNZbJ9X4f;Pl+Iu2(&kzlbds4gORoN%BtP5AK;HIZ}L0qP>YD2=a83nCfm8SH6%)
z^3Q5Wel$xJPs`C2D`>uvtStH@czaadJ;O>~9LhZ=Ft}<eZgA6-m_6+hPq_V1lC{>k
zE+n>E()~zL6udH$Z{{>v&~(X2a`ruoF9{i5=kvKjoLOil{_Eo`!OwR2f;CKtyJzAg
z$+OD}b?MqW1osS!#CBG8g53c+f~AIy;-f!%)LC@Qs5>#z+TG7jP24u>sraE?saR=1
zr$lEB3hpl3E3xn$A=q`QLE@=qAPCduNiLjFaBmW>kZ=-G#QDGLih*d=or|m%M~2RJ
z&&ruDsed<*&*bqXFDLWre1ew?BvxX+*{th)xp}t)Q3b06O~G>oId?Q9csxMT7+_d8
z^2X1C2*+y@<J3xtr#wo${l-i5b>#K+6lD+B(l6P&0=B$9hsF9T%bvQVvh3lszbw0_
zN@dw~yuU2Fa7ks^U8%n;JGG>;>=5iP%eE?6Z|Og&zbx~PRF+NW-?9-$mHkdlIwqFB
z5hqEV-S;i-(`EO5i{BW3*I!tEk9)Y^<L<G4$5L>rk4D^*;MhfT<3bmNM+LLBSZvjy
z@_O>2_TK{Tw_wz_VDz`Z;alMNHBgES4hfHBaSqDs8|kkcCVg?Bs_&?*%F>td*z!`H
z)1$_Dr#4L0?du$Wop26o_?H>~nn{11^c-Tb<ykDwULQ7xbKjF~%NmxX-k-+(N_(Nk
z=~I(7NpZ5Z(NmNylnnPh>yM*8Y&}l4mkigZ+aEXTEAE*Zr&mqdB*n?rR8LX1z%tzT
ztUs<*hLe23y)o{O8{L0&Z;YilnHTgFWyX-<zGwY$Ju=+0FSr+${c#TcaW5>TIGKm^
z6lI2z;l5}6aht|Tal5=_KemV4ALrN~*Ta>KPU<~9MVS%5jP9STKW>){*YE|`J@z~9
z>yq~;c8`@3WwzB*lzCW2{GRnEo{$lHWkgP%r_>w6R5>?22TI*NLQijT?0nncCBafp
z+J3R8?b7hrxP^<O^^`{2I@)qsdRG6_BQ7S=R_f%Kgz$LV*u~3iqr&5Z!-C_3^=cIo
z&Er=r2{(5&kBg5@2#q%%VIHwCJTffK+;vu>d311;tSQu9D*xRrB64xCtXFhGq}0)o
z(q?I0_z3fFNORZ3RjWpPFXjIC(oz4tbo75Ob@=b4j{mvz%jWR^te&%qr8oZXRUH`>
zYx=hhOu4q)(Y9RE;UNhNBg0I?LL9g*qq+8D!W=`KM>~XvI*u7_ALin09~u@M&UNC3
dJJ`E~g@puL>8~_mvHthNQd45FCQ0A9{~rcN%{c%7

diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala
index 1c8b36af299d..ceebcfd41f7a 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala
@@ -22,6 +22,7 @@ import ai.rapids.cudf.Table
 import org.scalatest.funsuite.AnyFunSuite
 
 import ml.dmlc.xgboost4j.java.CudfColumnBatch
+import ml.dmlc.xgboost4j.scala.spark.Utils.withResource
 
 class QuantileDMatrixSuite extends AnyFunSuite {
 
@@ -30,10 +31,14 @@ class QuantileDMatrixSuite extends AnyFunSuite {
     val label1 = Array[java.lang.Float](25f, 21f, 22f, 20f, 24f)
     val weight1 = Array[java.lang.Float](1.3f, 2.31f, 0.32f, 3.3f, 1.34f)
     val baseMargin1 = Array[java.lang.Float](1.2f, 0.2f, 1.3f, 2.4f, 3.5f)
+    val group1 = Array[java.lang.Integer](1, 1, 7, 7, 19, 26)
 
     val label2 = Array[java.lang.Float](9f, 5f, 4f, 10f, 12f)
     val weight2 = Array[java.lang.Float](3.0f, 1.3f, 3.2f, 0.3f, 1.34f)
     val baseMargin2 = Array[java.lang.Float](0.2f, 2.5f, 3.1f, 4.4f, 2.2f)
+    val group2 = Array[java.lang.Integer](30, 30, 30, 40, 40)
+
+    val expectedGroup = Array(0, 2, 4, 5, 6, 9, 11)
 
     withResource(new Table.TestBuilder()
       .column(1.2f, null.asInstanceOf[java.lang.Float], 5.2f, 7.2f, 9.2f)
@@ -42,21 +47,27 @@ class QuantileDMatrixSuite extends AnyFunSuite {
       withResource(new Table.TestBuilder().column(label1: _*).build) { y_0 =>
         withResource(new Table.TestBuilder().column(weight1: _*).build) { w_0 =>
           withResource(new Table.TestBuilder().column(baseMargin1: _*).build) { m_0 =>
-            withResource(new Table.TestBuilder()
-              .column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f.asInstanceOf[java.lang.Float])
-              .column(1.2f, 1.4f, null.asInstanceOf[java.lang.Float], 12.6f, 10.10f).build) { X_1 =>
-              withResource(new Table.TestBuilder().column(label2: _*).build) { y_1 =>
-                withResource(new Table.TestBuilder().column(weight2: _*).build) { w_1 =>
-                  withResource(new Table.TestBuilder().column(baseMargin2: _*).build) { m_1 =>
-                    val batches = new ArrayBuffer[CudfColumnBatch]()
-                    batches += new CudfColumnBatch(X_0, y_0, w_0, m_0)
-                    batches += new CudfColumnBatch(X_1, y_1, w_1, m_1)
-                    val dmatrix = new QuantileDMatrix(batches.toIterator, 0.0f, 8, 1)
-                    assert(dmatrix.getLabel.sameElements(label1 ++ label2))
-                    assert(dmatrix.getWeight.sameElements(weight1 ++ weight2))
-                    assert(dmatrix.getBaseMargin.sameElements(baseMargin1 ++ baseMargin2))
+            withResource(new Table.TestBuilder().column(group1: _*).build) { q_0 =>
+              withResource(new Table.TestBuilder()
+                .column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f.asInstanceOf[java.lang.Float])
+                .column(1.2f, 1.4f, null.asInstanceOf[java.lang.Float], 12.6f, 10.10f).build) {
+                X_1 =>
+                  withResource(new Table.TestBuilder().column(label2: _*).build) { y_1 =>
+                    withResource(new Table.TestBuilder().column(weight2: _*).build) { w_1 =>
+                      withResource(new Table.TestBuilder().column(baseMargin2: _*).build) { m_1 =>
+                        withResource(new Table.TestBuilder().column(group2: _*).build) { q_2 =>
+                          val batches = new ArrayBuffer[CudfColumnBatch]()
+                          batches += new CudfColumnBatch(X_0, y_0, w_0, m_0, q_0)
+                          batches += new CudfColumnBatch(X_1, y_1, w_1, m_1, q_2)
+                          val dmatrix = new QuantileDMatrix(batches.toIterator, 0.0f, 8, 1)
+                          assert(dmatrix.getLabel.sameElements(label1 ++ label2))
+                          assert(dmatrix.getWeight.sameElements(weight1 ++ weight2))
+                          assert(dmatrix.getBaseMargin.sameElements(baseMargin1 ++ baseMargin2))
+                          assert(dmatrix.getGroup().sameElements(expectedGroup))
+                        }
+                      }
+                    }
                   }
-                }
               }
             }
           }
@@ -64,15 +75,4 @@ class QuantileDMatrixSuite extends AnyFunSuite {
       }
     }
   }
-
-  /** Executes the provided code block and then closes the resource */
-  private def withResource[T <: AutoCloseable, V](r: T)(block: T => V): V = {
-    try {
-      block(r)
-    } finally {
-      r.close()
-    }
-  }
-
 }
-
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala
index 9d2b41faadea..4b7e7e34b8ef 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala
@@ -20,14 +20,66 @@ import java.io.File
 
 import scala.collection.mutable.ArrayBuffer
 
-import ai.rapids.cudf.{CSVOptions, DType, Schema, Table}
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.types.{FloatType, StructField, StructType}
+import ai.rapids.cudf.Table
+import org.apache.spark.ml.linalg.DenseVector
+import org.apache.spark.sql.{Dataset, SparkSession}
 
+import ml.dmlc.xgboost4j.java.CudfColumnBatch
+import ml.dmlc.xgboost4j.scala.{DMatrix, QuantileDMatrix, XGBoost => ScalaXGBoost}
 import ml.dmlc.xgboost4j.scala.rapids.spark.GpuTestSuite
+import ml.dmlc.xgboost4j.scala.spark.Utils.withResource
 
 class GpuXGBoostPluginSuite extends GpuTestSuite {
 
+  test("params") {
+    withGpuSparkSession() { spark =>
+      import spark.implicits._
+      val df = Seq((1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),
+        (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f),
+        (3.0f, 4.0f, 5.0f, 6.0f, 0.0f, 0.1f),
+        (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.1f),
+        (5.0f, 6.0f, 7.0f, 8.0f, 0.0f, 0.1f)
+      ).toDF("c1", "c2", "weight", "margin", "label", "other")
+      val xgbParams: Map[String, Any] = Map(
+        "max_depth" -> 5,
+        "eta" -> 0.2,
+        "objective" -> "binary:logistic"
+      )
+      val features = Array("c1", "c2")
+      val estimator = new XGBoostClassifier(xgbParams)
+        .setFeaturesCol(features)
+        .setMissing(0.2f)
+        .setAlpha(0.97)
+        .setLeafPredictionCol("leaf")
+        .setContribPredictionCol("contrib")
+        .setNumRound(1)
+        .setDevice("cuda")
+
+      assert(estimator.getMaxDepth === 5)
+      assert(estimator.getEta === 0.2)
+      assert(estimator.getObjective === "binary:logistic")
+      assert(estimator.getFeaturesCols === features)
+      assert(estimator.getMissing === 0.2f)
+      assert(estimator.getAlpha === 0.97)
+      assert(estimator.getDevice === "cuda")
+
+      estimator.setEta(0.66).setMaxDepth(7)
+      assert(estimator.getMaxDepth === 7)
+      assert(estimator.getEta === 0.66)
+
+      val model = estimator.train(df)
+      assert(model.getMaxDepth === 7)
+      assert(model.getEta === 0.66)
+      assert(model.getObjective === "binary:logistic")
+      assert(model.getFeaturesCols === features)
+      assert(model.getMissing === 0.2f)
+      assert(model.getAlpha === 0.97)
+      assert(model.getLeafPredictionCol === "leaf")
+      assert(model.getContribPredictionCol === "contrib")
+      assert(model.getDevice === "cuda")
+    }
+  }
+
   test("isEnabled") {
     def checkIsEnabled(spark: SparkSession, expected: Boolean): Unit = {
       import spark.implicits._
@@ -120,6 +172,7 @@ class GpuXGBoostPluginSuite extends GpuTestSuite {
     }
   }
 
+  // test distributed
   test("build RDD Watches") {
     withGpuSparkSession() { spark =>
       import spark.implicits._
@@ -179,10 +232,44 @@ class GpuXGBoostPluginSuite extends GpuTestSuite {
     }
   }
 
-  test("build RDD Watches with Eval") {
+  // must set num worker to 1
+  test("build RDD Watches with group") {
     withGpuSparkSession() { spark =>
       import spark.implicits._
 
+      val df = Seq(
+        (1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 11, 0.0f),
+        (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 11, 0.1f),
+        (3.0f, 4.0f, 5.0f, 6.0f, 0.0f, 30, 0.1f),
+        (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 30, 0.1f),
+        (5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 41, 0.1f)
+      ).toDF("c1", "c2", "weight", "margin", "label", "group", "other")
+
+      val features = Array("c1", "c2")
+      val classifier = new XGBoostRanker()
+        .setNumWorkers(1)
+        .setGroupCol("group")
+        .setFeaturesCol(features)
+        .setDevice("cuda")
+
+      val rdd = classifier.getPlugin.get.buildRddWatches(classifier, df)
+      val result = rdd.mapPartitions { iter =>
+        val watches = iter.next()
+        Iterator.single(watches.datasets(0).getGroup)
+      }.collect()
+
+      val groups: ArrayBuffer[Int] = ArrayBuffer.empty
+
+      for (row <- result) {
+        groups.append(row: _*)
+      }
+      assert(groups.sorted === Array(0, 2, 4, 5).sorted)
+    }
+  }
+
+  test("build RDD Watches with Eval") {
+    withGpuSparkSession() { spark =>
+      import spark.implicits._
       val train = Seq(
         (1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),
         (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f)
@@ -244,18 +331,289 @@ class GpuXGBoostPluginSuite extends GpuTestSuite {
     }
   }
 
+  test("transformed schema") {
+    withGpuSparkSession() { spark =>
+      import spark.implicits._
+      val df = Seq(
+        (1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),
+        (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f),
+        (3.0f, 4.0f, 5.0f, 6.0f, 0.0f, 0.1f),
+        (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.1f),
+        (5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 0.1f)
+      ).toDF("c1", "c2", "weight", "margin", "label", "other")
+
+      val estimator = new XGBoostClassifier()
+        .setNumWorkers(1)
+        .setNumRound(2)
+        .setFeaturesCol(Array("c1", "c2"))
+        .setLabelCol("label")
+        .setDevice("cuda")
+
+      assert(estimator.getPlugin.isDefined && estimator.getPlugin.get.isEnabled(df))
+
+      val out = estimator.train(df).transform(df)
+      // Transform should not discard the other columns of the transforming dataframe
+      Seq("c1", "c2", "weight", "margin", "label", "other").foreach { v =>
+        assert(out.schema.names.contains(v))
+      }
+
+      // Transform for XGBoostClassifier needs to add extra columns
+      Seq("rawPrediction", "probability", "prediction").foreach { v =>
+        assert(out.schema.names.contains(v))
+      }
+      assert(out.schema.names.length === 9)
+
+      val out1 = estimator.setLeafPredictionCol("leaf").setContribPredictionCol("contrib")
+        .train(df)
+        .transform(df)
+      Seq("leaf", "contrib").foreach { v =>
+        assert(out1.schema.names.contains(v))
+      }
+    }
+  }
 
-  test("XGBoost-Spark should match xgboost4j") {
+  private def checkEqual(left: Array[Array[Float]],
+                         right: Array[Array[Float]],
+                         epsilon: Float = 1e-4f): Unit = {
+    assert(left.size === right.size)
+    left.zip(right).foreach { case (leftValue, rightValue) =>
+      leftValue.zip(rightValue).foreach { case (l, r) =>
+        assert(math.abs(l - r) < epsilon)
+      }
+    }
+  }
+
+  Seq("binary:logistic", "multi:softprob").foreach { case objective =>
+    test(s"$objective: XGBoost-Spark should match xgboost4j") {
+      withGpuSparkSession() { spark =>
+        import spark.implicits._
+
+        val numRound = 100
+        var xgboostParams: Map[String, Any] = Map(
+          "objective" -> objective,
+          "device" -> "cuda"
+        )
+
+        val (trainPath, testPath) = if (objective == "binary:logistic") {
+          (writeFile(Classification.train.toDF("label", "weight", "c1", "c2", "c3")),
+            writeFile(Classification.test.toDF("label", "weight", "c1", "c2", "c3")))
+        } else {
+          xgboostParams = xgboostParams ++ Map("num_class" -> 6)
+          (writeFile(MultiClassification.train.toDF("label", "weight", "c1", "c2", "c3")),
+            writeFile(MultiClassification.test.toDF("label", "weight", "c1", "c2", "c3")))
+        }
+
+        val df = spark.read.parquet(trainPath)
+        val testdf = spark.read.parquet(testPath)
+
+        val features = Array("c1", "c2", "c3")
+        val featuresIndices = features.map(df.schema.fieldIndex)
+        val label = "label"
+
+        val classifier = new XGBoostClassifier(xgboostParams)
+          .setFeaturesCol(features)
+          .setLabelCol(label)
+          .setNumRound(numRound)
+          .setLeafPredictionCol("leaf")
+          .setContribPredictionCol("contrib")
+          .setDevice("cuda")
+
+        val xgb4jModel = withResource(new GpuColumnBatch(
+          Table.readParquet(new File(trainPath)))) { batch =>
+          val cb = new CudfColumnBatch(batch.select(featuresIndices),
+            batch.select(df.schema.fieldIndex(label)), null, null, null
+          )
+          val qdm = new QuantileDMatrix(Seq(cb).iterator, classifier.getMissing,
+            classifier.getMaxBins, classifier.getNthread)
+          ScalaXGBoost.train(qdm, xgboostParams, numRound)
+        }
+
+        val (xgb4jLeaf, xgb4jContrib, xgb4jProb, xgb4jRaw) = withResource(new GpuColumnBatch(
+          Table.readParquet(new File(testPath)))) { batch =>
+          val cb = new CudfColumnBatch(batch.select(featuresIndices), null, null, null, null
+          )
+          val qdm = new DMatrix(cb, classifier.getMissing, classifier.getNthread)
+          (xgb4jModel.predictLeaf(qdm), xgb4jModel.predictContrib(qdm),
+            xgb4jModel.predict(qdm), xgb4jModel.predict(qdm, outPutMargin = true))
+        }
+
+        val rows = classifier.train(df).transform(testdf).collect()
+
+        // Check Leaf
+        val xgbSparkLeaf = rows.map(row => row.getAs[DenseVector]("leaf").toArray.map(_.toFloat))
+        checkEqual(xgb4jLeaf, xgbSparkLeaf)
+
+        // Check contrib
+        val xgbSparkContrib = rows.map(row =>
+          row.getAs[DenseVector]("contrib").toArray.map(_.toFloat))
+        checkEqual(xgb4jContrib, xgbSparkContrib)
+
+        // Check probability
+        var xgbSparkProb = rows.map(row =>
+          row.getAs[DenseVector]("probability").toArray.map(_.toFloat))
+        if (objective == "binary:logistic") {
+          xgbSparkProb = xgbSparkProb.map(v => Array(v(1)))
+        }
+        checkEqual(xgb4jProb, xgbSparkProb)
+
+        // Check raw
+        var xgbSparkRaw = rows.map(row =>
+          row.getAs[DenseVector]("rawPrediction").toArray.map(_.toFloat))
+        if (objective == "binary:logistic") {
+          xgbSparkRaw = xgbSparkRaw.map(v => Array(v(1)))
+        }
+        checkEqual(xgb4jRaw, xgbSparkRaw)
+
+      }
+    }
+  }
+
+  test(s"Regression: XGBoost-Spark should match xgboost4j") {
+    withGpuSparkSession() { spark =>
+      import spark.implicits._
+
+      val trainPath = writeFile(Regression.train.toDF("label", "weight", "c1", "c2", "c3"))
+      val testPath = writeFile(Regression.test.toDF("label", "weight", "c1", "c2", "c3"))
+
+      val df = spark.read.parquet(trainPath)
+      val testdf = spark.read.parquet(testPath)
+
+      val features = Array("c1", "c2", "c3")
+      val featuresIndices = features.map(df.schema.fieldIndex)
+      val label = "label"
+
+      val numRound = 100
+      val xgboostParams: Map[String, Any] = Map(
+        "device" -> "cuda"
+      )
+
+      val regressor = new XGBoostRegressor(xgboostParams)
+        .setFeaturesCol(features)
+        .setLabelCol(label)
+        .setNumRound(numRound)
+        .setLeafPredictionCol("leaf")
+        .setContribPredictionCol("contrib")
+        .setDevice("cuda")
+
+      val xgb4jModel = withResource(new GpuColumnBatch(
+        Table.readParquet(new File(trainPath)))) { batch =>
+        val cb = new CudfColumnBatch(batch.select(featuresIndices),
+          batch.select(df.schema.fieldIndex(label)), null, null, null
+        )
+        val qdm = new QuantileDMatrix(Seq(cb).iterator, regressor.getMissing,
+          regressor.getMaxBins, regressor.getNthread)
+        ScalaXGBoost.train(qdm, xgboostParams, numRound)
+      }
+
+      val (xgb4jLeaf, xgb4jContrib, xgb4jPred) = withResource(new GpuColumnBatch(
+        Table.readParquet(new File(testPath)))) { batch =>
+        val cb = new CudfColumnBatch(batch.select(featuresIndices), null, null, null, null
+        )
+        val qdm = new DMatrix(cb, regressor.getMissing, regressor.getNthread)
+        (xgb4jModel.predictLeaf(qdm), xgb4jModel.predictContrib(qdm),
+          xgb4jModel.predict(qdm))
+      }
+
+      val rows = regressor.train(df).transform(testdf).collect()
+
+      // Check Leaf
+      val xgbSparkLeaf = rows.map(row => row.getAs[DenseVector]("leaf").toArray.map(_.toFloat))
+      checkEqual(xgb4jLeaf, xgbSparkLeaf)
+
+      // Check contrib
+      val xgbSparkContrib = rows.map(row =>
+        row.getAs[DenseVector]("contrib").toArray.map(_.toFloat))
+      checkEqual(xgb4jContrib, xgbSparkContrib)
+
+      // Check prediction
+      val xgbSparkPred = rows.map(row =>
+        Array(row.getAs[Double]("prediction").toFloat))
+      checkEqual(xgb4jPred, xgbSparkPred)
+    }
+  }
+
+  test("Ranker: XGBoost-Spark should match xgboost4j") {
     withGpuSparkSession() { spark =>
+      import spark.implicits._
 
-      val cols = Array("c0", "c1", "c2", "c3", "c4", "c5")
+      val trainPath = writeFile(Ranking.train.toDF("label", "weight", "group", "c1", "c2", "c3"))
+      val testPath = writeFile(Ranking.test.toDF("label", "weight", "group", "c1", "c2", "c3"))
+
+      val df = spark.read.parquet(trainPath)
+      val testdf = spark.read.parquet(testPath)
+
+      val features = Array("c1", "c2", "c3")
+      val featuresIndices = features.map(df.schema.fieldIndex)
       val label = "label"
+      val group = "group"
+
+      val numRound = 100
+      val xgboostParams: Map[String, Any] = Map(
+        "device" -> "cuda",
+        "objective" -> "rank:ndcg"
+      )
+
+      val ranker = new XGBoostRanker(xgboostParams)
+        .setFeaturesCol(features)
+        .setLabelCol(label)
+        .setNumRound(numRound)
+        .setLeafPredictionCol("leaf")
+        .setContribPredictionCol("contrib")
+        .setGroupCol(group)
+        .setDevice("cuda")
 
-      val table = Table.readParquet(new File(getResourcePath("/binary.train.parquet")))
-      val df = spark.read.parquet(getResourcePath("/binary.train.parquet"))
+      val xgb4jModel = withResource(new GpuColumnBatch(
+        Table.readParquet(new File(trainPath)))) { batch =>
+        val cb = new CudfColumnBatch(batch.select(featuresIndices),
+          batch.select(df.schema.fieldIndex(label)), null, null,
+          batch.select(df.schema.fieldIndex(group)))
+        val qdm = new QuantileDMatrix(Seq(cb).iterator, ranker.getMissing,
+          ranker.getMaxBins, ranker.getNthread)
+        ScalaXGBoost.train(qdm, xgboostParams, numRound)
+      }
+
+      val (xgb4jLeaf, xgb4jContrib, xgb4jPred) = withResource(new GpuColumnBatch(
+        Table.readParquet(new File(testPath)))) { batch =>
+        val cb = new CudfColumnBatch(batch.select(featuresIndices), null, null, null, null
+        )
+        val qdm = new DMatrix(cb, ranker.getMissing, ranker.getNthread)
+        (xgb4jModel.predictLeaf(qdm), xgb4jModel.predictContrib(qdm),
+          xgb4jModel.predict(qdm))
+      }
+
+      val rows = ranker.train(df).transform(testdf).collect()
+
+      // Check Leaf
+      val xgbSparkLeaf = rows.map(row => row.getAs[DenseVector]("leaf").toArray.map(_.toFloat))
+      checkEqual(xgb4jLeaf, xgbSparkLeaf)
+
+      // Check contrib
+      val xgbSparkContrib = rows.map(row =>
+        row.getAs[DenseVector]("contrib").toArray.map(_.toFloat))
+      checkEqual(xgb4jContrib, xgbSparkContrib)
 
+      // Check prediction
+      val xgbSparkPred = rows.map(row =>
+        Array(row.getAs[Double]("prediction").toFloat))
+      checkEqual(xgb4jPred, xgbSparkPred)
+    }
+  }
 
-      df.show()
+  def writeFile(df: Dataset[_]): String = {
+    def listFiles(directory: String): Array[String] = {
+      val dir = new File(directory)
+      if (dir.exists && dir.isDirectory) {
+        dir.listFiles.filter(f => f.isFile && f.getName.startsWith("part-")).map(_.getName)
+      } else {
+        Array.empty[String]
+      }
     }
+
+    val dir = createTmpFolder("gpu_").toAbsolutePath.toString
+    df.coalesce(1).write.parquet(s"$dir/data")
+
+    val file = listFiles(s"$dir/data")(0)
+    s"$dir/data/$file"
   }
+
 }
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala
new file mode 100644
index 000000000000..49c790fd0a00
--- /dev/null
+++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala
@@ -0,0 +1,86 @@
+/*
+ Copyright (c) 2014-2024 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+package ml.dmlc.xgboost4j.scala.spark
+
+import scala.util.Random
+
+trait TrainTestData {
+
+  protected def generateClassificationDataset(
+      numRows: Int,
+      numClass: Int,
+      seed: Int = 1): Seq[(Int, Float, Float, Float, Float)] = {
+    val random = new Random()
+    random.setSeed(seed)
+    (1 to numRows).map { _ =>
+      val label = random.nextInt(numClass)
+      // label, weight, c1, c2, c3
+      (label, random.nextFloat().abs, random.nextGaussian().toFloat, random.nextGaussian().toFloat,
+        random.nextGaussian().toFloat)
+    }
+  }
+
+  protected def generateRegressionDataset(
+      numRows: Int,
+      seed: Int = 11): Seq[(Float, Float, Float, Float, Float)] = {
+    val random = new Random()
+    random.setSeed(seed)
+    (1 to numRows).map { _ =>
+      // label, weight, c1, c2, c3
+      (random.nextFloat(), random.nextFloat().abs, random.nextGaussian().toFloat,
+        random.nextGaussian().toFloat,
+        random.nextGaussian().toFloat)
+    }
+  }
+
+  protected def generateRankDataset(
+      numRows: Int,
+      numClass: Int,
+      maxGroup: Int = 12,
+      seed: Int = 99): Seq[(Int, Float, Int, Float, Float, Float)] = {
+    val random = new Random()
+    random.setSeed(seed)
+    (1 to numRows).map { _ =>
+      val group = random.nextInt(maxGroup)
+      // label, weight, group, c1, c2, c3
+      (random.nextInt(numClass), group.toFloat, group,
+        random.nextGaussian().toFloat,
+        random.nextGaussian().toFloat,
+        random.nextGaussian().toFloat)
+    }
+  }
+}
+
+object Classification extends TrainTestData {
+  val train = generateClassificationDataset(300, 2, 3)
+  val test = generateClassificationDataset(150, 2, 5)
+}
+
+object MultiClassification extends TrainTestData {
+  val train = generateClassificationDataset(300, 4, 11)
+  val test = generateClassificationDataset(150, 4, 12)
+}
+
+object Regression extends TrainTestData {
+  val train = generateRegressionDataset(300, 222)
+  val test = generateRegressionDataset(150, 223)
+}
+
+object Ranking extends TrainTestData {
+  val train = generateRankDataset(300, 10, 555)
+  val test = generateRankDataset(150, 10, 556)
+}
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XXXXXSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XXXXXSuite.scala
deleted file mode 100644
index f98c9614ab68..000000000000
--- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XXXXXSuite.scala
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- Copyright (c) 2021-2024 by Contributors
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-
-package ml.dmlc.xgboost4j.scala.spark
-
-import org.scalatest.funsuite.AnyFunSuite
-
-import ml.dmlc.xgboost4j.scala.rapids.spark.GpuTestSuite
-
-class XXXXXSuite extends AnyFunSuite with GpuTestSuite {
-
-  test("test Gpu XGBoostClassifierSuite") {
-    // Define the schema for the fake data
-
-    withGpuSparkSession() { spark =>
-      var df = spark.read.parquet("/home/bobwang/data/iris/parquet")
-
-      df.sparkSession.conf.get("spark.rapids.sql.enabled")
-
-      // Select the features and label columns
-      val labelCol = "class"
-
-      val features = df.schema.names.filter(_ != labelCol)
-
-      //    df = df.withColumn("base_margin", lit(20))
-      //      .withColumn("weight", rand(1))
-
-      var Array(trainDf, validationDf) = df.randomSplit(Array(0.8, 0.2), seed = 1)
-
-      //      trainDf = trainDf.withColumn("validation", lit(false))
-      //      validationDf = validationDf.withColumn("validationDf", lit(true))
-
-      //      df = trainDf.union(validationDf)
-      //
-      //      // Assemble the feature columns into a single vector column
-      //      val assembler = new VectorAssembler()
-      //        .setInputCols(features)
-      //        .setOutputCol("features")
-      //      val dataset = assembler.transform(df)
-
-      //    val arrayInput = df.select(array(features.map(col(_)): _*).as("features"),
-      //      col("label"), col("base_margin"))
-
-      val est = new XGBoostClassifier()
-        .setNumWorkers(1)
-        .setNumRound(100)
-        //        .setMaxDepth(3)
-        //      .setWeightCol("weight")
-        //      .setBaseMarginCol("base_margin")
-        .setFeaturesCol(features)
-        .setLabelCol(labelCol)
-        .setLeafPredictionCol("leaf")
-        .setContribPredictionCol("contrib")
-        .setDevice("cuda")
-      //        .setEvalDataset(validationDf)
-      //        .setValidationIndicatorCol("validation")
-      //      .setPredictionCol("")
-      //        .setRawPredictionCol("")
-      //        .setProbabilityCol("xxxx")
-      //      .setContribPredictionCol("contrb")
-      //      .setLeafPredictionCol("leaf")
-      //    val est = new XGBoostClassifier().setLabelCol(labelCol)
-      //    est.fit(arrayInput)
-      //      est.write.overwrite().save("/tmp/abcdef")
-      //      val loadedEst = XGBoostClassifier.load("/tmp/abcdef")
-      //      println(loadedEst.getNumRound)
-      //      println(loadedEst.getMaxDepth)
-
-      val model = est.fit(trainDf)
-
-      val out = model.transform(df)
-      out.printSchema()
-      out.show(150)
-      //      model.write.overwrite().save("/tmp/model/")
-      //      val loadedModel = XGBoostClassificationModel.load("/tmp/model")
-      //      println(loadedModel.getNumRound)
-      //      println(loadedModel.getMaxDepth)
-      //      model.transform(df).drop(features: _*).show(150, false)
-    }
-
-  }
-}
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala
index 6c9716089419..cae44ab9aef1 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala
@@ -22,7 +22,7 @@ import org.json4s.{DefaultFormats, FullTypeHints, JField, JValue, NoTypeHints, T
 
 import ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint}
 
-object Utils {
+private[scala] object Utils {
 
   private[spark] implicit class XGBLabeledPointFeatures(
       val labeledPoint: XGBLabeledPoint
@@ -111,4 +111,14 @@ object Utils {
 
   val TRAIN_NAME = "train"
   val VALIDATION_NAME = "eval"
+
+
+  /** Executes the provided code block and then closes the resource */
+  def withResource[T <: AutoCloseable, V](r: T)(block: T => V): V = {
+    try {
+      block(r)
+    } finally {
+      r.close()
+    }
+  }
 }
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala
index eebaae4306fd..49b50fcc469f 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala
@@ -26,7 +26,7 @@ import org.scalatest.BeforeAndAfterEach
 import org.scalatest.funsuite.AnyFunSuite
 
 import ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint}
-import ml.dmlc.xgboost4j.scala.spark.Utils.XGBLabeledPointFeatures
+import ml.dmlc.xgboost4j.scala.spark.Utils.{withResource, XGBLabeledPointFeatures}
 
 trait PerTest extends BeforeAndAfterEach {
   self: AnyFunSuite =>
@@ -103,15 +103,6 @@ trait PerTest extends BeforeAndAfterEach {
     }
   }
 
-  /** Executes the provided code block and then closes the resource */
-  protected def withResource[T <: AutoCloseable, V](r: T)(block: T => V): V = {
-    try {
-      block(r)
-    } finally {
-      r.close()
-    }
-  }
-
   def smallBinaryClassificationVector: DataFrame = ss.createDataFrame(sc.parallelize(Seq(
     (1.0, 0.5, 1.0, Vectors.dense(1.0, 2.0, 3.0)),
     (0.0, 0.4, -3.0, Vectors.dense(0.0, 0.0, 0.0)),
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
index 32f3ee1bd568..dcd22009514e 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
@@ -112,6 +112,13 @@ class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerS
 
     assert(out.schema.names.contains("leaf"))
     assert(out.schema.names.contains("contrib"))
+
+    val out1 = classifier.setLeafPredictionCol("leaf1")
+      .setContribPredictionCol("contrib1")
+      .train(trainDf).transform(trainDf)
+
+    assert(out1.schema.names.contains("leaf1"))
+    assert(out1.schema.names.contains("contrib1"))
   }
 
   test("Supported objectives") {
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala
index 4a8b59741e63..614e93c8e8cf 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala
@@ -32,408 +32,422 @@ import ml.dmlc.xgboost4j.scala.spark.Utils.TRAIN_NAME
 class XGBoostEstimatorSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite {
 
   test("params") {
+    val df = smallBinaryClassificationVector
     val xgbParams: Map[String, Any] = Map(
       "max_depth" -> 5,
       "eta" -> 0.2,
       "objective" -> "binary:logistic"
     )
-    val estimator = new XGBoostRegressor(xgbParams)
-      .setFeaturesCol("abc")
+    val estimator = new XGBoostClassifier(xgbParams)
+      .setFeaturesCol("features")
       .setMissing(0.2f)
       .setAlpha(0.97)
+      .setLeafPredictionCol("leaf")
+      .setContribPredictionCol("contrib")
+      .setNumRound(1)
 
     assert(estimator.getMaxDepth === 5)
     assert(estimator.getEta === 0.2)
     assert(estimator.getObjective === "binary:logistic")
-    assert(estimator.getFeaturesCol === "abc")
+    assert(estimator.getFeaturesCol === "features")
     assert(estimator.getMissing === 0.2f)
     assert(estimator.getAlpha === 0.97)
 
     estimator.setEta(0.66).setMaxDepth(7)
     assert(estimator.getMaxDepth === 7)
     assert(estimator.getEta === 0.66)
+
+    val model = estimator.train(df)
+    assert(model.getMaxDepth === 7)
+    assert(model.getEta === 0.66)
+    assert(model.getObjective === "binary:logistic")
+    assert(model.getFeaturesCol === "features")
+    assert(model.getMissing === 0.2f)
+    assert(model.getAlpha === 0.97)
+    assert(model.getLeafPredictionCol === "leaf")
+    assert(model.getContribPredictionCol === "contrib")
   }
 
-test("nthread") {
-  val classifier = new XGBoostClassifier().setNthread(100)
+  test("nthread") {
+    val classifier = new XGBoostClassifier().setNthread(100)
 
-  intercept[IllegalArgumentException](
-    classifier.validate(smallBinaryClassificationVector)
-  )
-}
+    intercept[IllegalArgumentException](
+      classifier.validate(smallBinaryClassificationVector)
+    )
+  }
 
-test("RuntimeParameter") {
-  var runtimeParams = new XGBoostClassifier(
-    Map("device" -> "cpu"))
-    .getRuntimeParameters(true)
-  assert(!runtimeParams.runOnGpu)
-
-  runtimeParams = new XGBoostClassifier(
-    Map("device" -> "cuda")).setNumWorkers(1).setNumRound(1)
-    .getRuntimeParameters(true)
-  assert(runtimeParams.runOnGpu)
-
-  runtimeParams = new XGBoostClassifier(
-    Map("device" -> "cpu", "tree_method" -> "gpu_hist")).setNumWorkers(1).setNumRound(1)
-    .getRuntimeParameters(true)
-  assert(runtimeParams.runOnGpu)
-
-  runtimeParams = new XGBoostClassifier(
-    Map("device" -> "cuda", "tree_method" -> "gpu_hist")).setNumWorkers(1).setNumRound(1)
-    .getRuntimeParameters(true)
-  assert(runtimeParams.runOnGpu)
-}
+  test("RuntimeParameter") {
+    var runtimeParams = new XGBoostClassifier(
+      Map("device" -> "cpu"))
+      .getRuntimeParameters(true)
+    assert(!runtimeParams.runOnGpu)
+
+    runtimeParams = new XGBoostClassifier(
+      Map("device" -> "cuda")).setNumWorkers(1).setNumRound(1)
+      .getRuntimeParameters(true)
+    assert(runtimeParams.runOnGpu)
+
+    runtimeParams = new XGBoostClassifier(
+      Map("device" -> "cpu", "tree_method" -> "gpu_hist")).setNumWorkers(1).setNumRound(1)
+      .getRuntimeParameters(true)
+    assert(runtimeParams.runOnGpu)
+
+    runtimeParams = new XGBoostClassifier(
+      Map("device" -> "cuda", "tree_method" -> "gpu_hist")).setNumWorkers(1).setNumRound(1)
+      .getRuntimeParameters(true)
+    assert(runtimeParams.runOnGpu)
+  }
 
-test("test persistence of XGBoostClassifier and XGBoostClassificationModel " +
-  "using custom Eval and Obj") {
-  val trainingDF = buildDataFrame(Classification.train)
-  val testDM = new DMatrix(Classification.test.iterator)
-
-  val paramMap = Map("eta" -> "0.1", "max_depth" -> "6",
-    "verbosity" -> "1", "objective" -> "binary:logistic")
-
-  val xgbc = new XGBoostClassifier(paramMap)
-    .setCustomObj(new CustomObj(1))
-    .setCustomEval(new EvalError)
-    .setNumRound(10)
-    .setNumWorkers(numWorkers)
-
-  val xgbcPath = new File(tempDir.toFile, "xgbc").getPath
-  xgbc.write.overwrite().save(xgbcPath)
-  val xgbc2 = XGBoostClassifier.load(xgbcPath)
-
-  assert(xgbc.getCustomObj.asInstanceOf[CustomObj].customParameter === 1)
-  assert(xgbc2.getCustomObj.asInstanceOf[CustomObj].customParameter === 1)
-
-  val eval = new EvalError()
-
-  val model = xgbc.fit(trainingDF)
-  val evalResults = eval.eval(model.nativeBooster.predict(testDM, outPutMargin = true), testDM)
-  assert(evalResults < 0.1)
-  val xgbcModelPath = new File(tempDir.toFile, "xgbcModel").getPath
-  model.write.overwrite.save(xgbcModelPath)
-  val model2 = XGBoostClassificationModel.load(xgbcModelPath)
-  assert(Arrays.equals(model.nativeBooster.toByteArray, model2.nativeBooster.toByteArray))
-
-  assert(model.getEta === model2.getEta)
-  assert(model.getNumRound === model2.getNumRound)
-  assert(model.getRawPredictionCol === model2.getRawPredictionCol)
-  val evalResults2 = eval.eval(model2.nativeBooster.predict(testDM, outPutMargin = true), testDM)
-  assert(evalResults === evalResults2)
-}
+  test("test persistence of XGBoostClassifier and XGBoostClassificationModel " +
+    "using custom Eval and Obj") {
+    val trainingDF = buildDataFrame(Classification.train)
+    val testDM = new DMatrix(Classification.test.iterator)
 
-test("Check for Spark encryption over-the-wire") {
-  val originalSslConfOpt = ss.conf.getOption("spark.ssl.enabled")
-  ss.conf.set("spark.ssl.enabled", true)
+    val paramMap = Map("eta" -> "0.1", "max_depth" -> "6",
+      "verbosity" -> "1", "objective" -> "binary:logistic")
 
-  val paramMap = Map("eta" -> "1", "max_depth" -> "2", "verbosity" -> "1",
-    "objective" -> "binary:logistic")
-  val training = smallBinaryClassificationVector
+    val xgbc = new XGBoostClassifier(paramMap)
+      .setCustomObj(new CustomObj(1))
+      .setCustomEval(new EvalError)
+      .setNumRound(10)
+      .setNumWorkers(numWorkers)
 
-  withClue("xgboost-spark should throw an exception when spark.ssl.enabled = true but " +
-    "xgboost.spark.ignoreSsl != true") {
-    val thrown = intercept[Exception] {
-      new XGBoostClassifier(paramMap).setNumRound(2).setNumWorkers(numWorkers).fit(training)
-    }
-    assert(thrown.getMessage.contains("xgboost.spark.ignoreSsl") &&
-      thrown.getMessage.contains("spark.ssl.enabled"))
+    val xgbcPath = new File(tempDir.toFile, "xgbc").getPath
+    xgbc.write.overwrite().save(xgbcPath)
+    val xgbc2 = XGBoostClassifier.load(xgbcPath)
+
+    assert(xgbc.getCustomObj.asInstanceOf[CustomObj].customParameter === 1)
+    assert(xgbc2.getCustomObj.asInstanceOf[CustomObj].customParameter === 1)
+
+    val eval = new EvalError()
+
+    val model = xgbc.fit(trainingDF)
+    val evalResults = eval.eval(model.nativeBooster.predict(testDM, outPutMargin = true), testDM)
+    assert(evalResults < 0.1)
+    val xgbcModelPath = new File(tempDir.toFile, "xgbcModel").getPath
+    model.write.overwrite.save(xgbcModelPath)
+    val model2 = XGBoostClassificationModel.load(xgbcModelPath)
+    assert(Arrays.equals(model.nativeBooster.toByteArray, model2.nativeBooster.toByteArray))
+
+    assert(model.getEta === model2.getEta)
+    assert(model.getNumRound === model2.getNumRound)
+    assert(model.getRawPredictionCol === model2.getRawPredictionCol)
+    val evalResults2 = eval.eval(model2.nativeBooster.predict(testDM, outPutMargin = true), testDM)
+    assert(evalResults === evalResults2)
   }
 
-  // Confirm that this check can be overridden.
-  ss.conf.set("xgboost.spark.ignoreSsl", true)
-  new XGBoostClassifier(paramMap).setNumRound(2).setNumWorkers(numWorkers).fit(training)
+  test("Check for Spark encryption over-the-wire") {
+    val originalSslConfOpt = ss.conf.getOption("spark.ssl.enabled")
+    ss.conf.set("spark.ssl.enabled", true)
+
+    val paramMap = Map("eta" -> "1", "max_depth" -> "2", "verbosity" -> "1",
+      "objective" -> "binary:logistic")
+    val training = smallBinaryClassificationVector
+
+    withClue("xgboost-spark should throw an exception when spark.ssl.enabled = true but " +
+      "xgboost.spark.ignoreSsl != true") {
+      val thrown = intercept[Exception] {
+        new XGBoostClassifier(paramMap).setNumRound(2).setNumWorkers(numWorkers).fit(training)
+      }
+      assert(thrown.getMessage.contains("xgboost.spark.ignoreSsl") &&
+        thrown.getMessage.contains("spark.ssl.enabled"))
+    }
 
-  originalSslConfOpt match {
-    case None =>
-      ss.conf.unset("spark.ssl.enabled")
-    case Some(originalSslConf) =>
-      ss.conf.set("spark.ssl.enabled", originalSslConf)
+    // Confirm that this check can be overridden.
+    ss.conf.set("xgboost.spark.ignoreSsl", true)
+    new XGBoostClassifier(paramMap).setNumRound(2).setNumWorkers(numWorkers).fit(training)
+
+    originalSslConfOpt match {
+      case None =>
+        ss.conf.unset("spark.ssl.enabled")
+      case Some(originalSslConf) =>
+        ss.conf.set("spark.ssl.enabled", originalSslConf)
+    }
+    ss.conf.unset("xgboost.spark.ignoreSsl")
   }
-  ss.conf.unset("xgboost.spark.ignoreSsl")
-}
 
-test("nthread configuration must be no larger than spark.task.cpus") {
-  val training = smallBinaryClassificationVector
-  val paramMap = Map("eta" -> "1", "max_depth" -> "2", "verbosity" -> "1",
-    "objective" -> "binary:logistic")
-  intercept[IllegalArgumentException] {
-    new XGBoostClassifier(paramMap)
-      .setNumWorkers(numWorkers)
-      .setNumRound(2)
-      .setNthread(sc.getConf.getInt("spark.task.cpus", 1) + 1)
-      .fit(training)
+  test("nthread configuration must be no larger than spark.task.cpus") {
+    val training = smallBinaryClassificationVector
+    val paramMap = Map("eta" -> "1", "max_depth" -> "2", "verbosity" -> "1",
+      "objective" -> "binary:logistic")
+    intercept[IllegalArgumentException] {
+      new XGBoostClassifier(paramMap)
+        .setNumWorkers(numWorkers)
+        .setNumRound(2)
+        .setNthread(sc.getConf.getInt("spark.task.cpus", 1) + 1)
+        .fit(training)
+    }
   }
-}
 
-test("preprocess dataset") {
-  val dataset = ss.createDataFrame(sc.parallelize(Seq(
-    (1.0, 0, 0.5, 1.0, Vectors.dense(1.0, 2.0, 3.0), "a"),
-    (0.0, 2, -0.5, 0.0, Vectors.dense(0.2, 1.2, 2.0), "b"),
-    (2.0, 2, -0.4, -2.1, Vectors.dense(0.5, 2.2, 1.7), "c")
-  ))).toDF("label", "group", "margin", "weight", "features", "other")
-
-  val classifier = new XGBoostClassifier()
-    .setLabelCol("label")
-    .setFeaturesCol("features")
-    .setBaseMarginCol("margin")
-    .setWeightCol("weight")
-
-  val (df, indices) = classifier.preprocess(dataset)
-  var schema = df.schema
-  assert(!schema.names.contains("group") && !schema.names.contains("other"))
-  assert(indices.labelId == schema.fieldIndex("label") &&
-    indices.groupId.isEmpty &&
-    indices.marginId.get == schema.fieldIndex("margin") &&
-    indices.weightId.get == schema.fieldIndex("weight") &&
-    indices.featureId.get == schema.fieldIndex("features") &&
-    indices.featureIds.isEmpty)
-
-  classifier.setWeightCol("")
-  val (df1, indices1) = classifier.preprocess(dataset)
-  schema = df1.schema
-  Seq("weight", "group", "other").foreach(v => assert(!schema.names.contains(v)))
-  assert(indices1.labelId == schema.fieldIndex("label") &&
-    indices1.groupId.isEmpty &&
-    indices1.marginId.get == schema.fieldIndex("margin") &&
-    indices1.weightId.isEmpty &&
-    indices1.featureId.get == schema.fieldIndex("features") &&
-    indices1.featureIds.isEmpty)
-}
+  test("preprocess dataset") {
+    val dataset = ss.createDataFrame(sc.parallelize(Seq(
+      (1.0, 0, 0.5, 1.0, Vectors.dense(1.0, 2.0, 3.0), "a"),
+      (0.0, 2, -0.5, 0.0, Vectors.dense(0.2, 1.2, 2.0), "b"),
+      (2.0, 2, -0.4, -2.1, Vectors.dense(0.5, 2.2, 1.7), "c")
+    ))).toDF("label", "group", "margin", "weight", "features", "other")
+
+    val classifier = new XGBoostClassifier()
+      .setLabelCol("label")
+      .setFeaturesCol("features")
+      .setBaseMarginCol("margin")
+      .setWeightCol("weight")
+
+    val (df, indices) = classifier.preprocess(dataset)
+    var schema = df.schema
+    assert(!schema.names.contains("group") && !schema.names.contains("other"))
+    assert(indices.labelId == schema.fieldIndex("label") &&
+      indices.groupId.isEmpty &&
+      indices.marginId.get == schema.fieldIndex("margin") &&
+      indices.weightId.get == schema.fieldIndex("weight") &&
+      indices.featureId.get == schema.fieldIndex("features") &&
+      indices.featureIds.isEmpty)
+
+    classifier.setWeightCol("")
+    val (df1, indices1) = classifier.preprocess(dataset)
+    schema = df1.schema
+    Seq("weight", "group", "other").foreach(v => assert(!schema.names.contains(v)))
+    assert(indices1.labelId == schema.fieldIndex("label") &&
+      indices1.groupId.isEmpty &&
+      indices1.marginId.get == schema.fieldIndex("margin") &&
+      indices1.weightId.isEmpty &&
+      indices1.featureId.get == schema.fieldIndex("features") &&
+      indices1.featureIds.isEmpty)
+  }
 
-test("to XGBoostLabeledPoint RDD") {
-  val data = Array(
-    Array(1.0, 2.0, 3.0, 4.0, 5.0),
-    Array(0.0, 0.0, 0.0, 0.0, 2.0),
-    Array(12.0, 13.0, 14.0, 14.0, 15.0),
-    Array(20.5, 21.2, 0.0, 0.0, 2.0)
-  )
-  val dataset = ss.createDataFrame(sc.parallelize(Seq(
-    (1.0, 0, 0.5, 1.0, Vectors.dense(data(0)), "a"),
-    (2.0, 2, -0.5, 0.0, Vectors.dense(data(1)).toSparse, "b"),
-    (3.0, 2, -0.5, 0.0, Vectors.dense(data(2)), "b"),
-    (4.0, 2, -0.4, -2.1, Vectors.dense(data(3)), "c")
-  ))).toDF("label", "group", "margin", "weight", "features", "other")
-
-  val classifier = new XGBoostClassifier()
-    .setLabelCol("label")
-    .setFeaturesCol("features")
-    .setWeightCol("weight")
-    .setNumWorkers(2)
-
-  val (df, indices) = classifier.preprocess(dataset)
-  val rdd = classifier.toXGBLabeledPoint(df, indices)
-  val result = rdd.collect().sortBy(x => x.label)
-
-  assert(result.length == data.length)
-
-  def toArray(index: Int): Array[Float] = {
-    val labelPoint = result(index)
-    if (labelPoint.indices != null) {
-      Vectors.sparse(labelPoint.size,
-        labelPoint.indices,
-        labelPoint.values.map(_.toDouble)).toArray.map(_.toFloat)
-    } else {
-      labelPoint.values
+  test("to XGBoostLabeledPoint RDD") {
+    val data = Array(
+      Array(1.0, 2.0, 3.0, 4.0, 5.0),
+      Array(0.0, 0.0, 0.0, 0.0, 2.0),
+      Array(12.0, 13.0, 14.0, 14.0, 15.0),
+      Array(20.5, 21.2, 0.0, 0.0, 2.0)
+    )
+    val dataset = ss.createDataFrame(sc.parallelize(Seq(
+      (1.0, 0, 0.5, 1.0, Vectors.dense(data(0)), "a"),
+      (2.0, 2, -0.5, 0.0, Vectors.dense(data(1)).toSparse, "b"),
+      (3.0, 2, -0.5, 0.0, Vectors.dense(data(2)), "b"),
+      (4.0, 2, -0.4, -2.1, Vectors.dense(data(3)), "c")
+    ))).toDF("label", "group", "margin", "weight", "features", "other")
+
+    val classifier = new XGBoostClassifier()
+      .setLabelCol("label")
+      .setFeaturesCol("features")
+      .setWeightCol("weight")
+      .setNumWorkers(2)
+
+    val (df, indices) = classifier.preprocess(dataset)
+    val rdd = classifier.toXGBLabeledPoint(df, indices)
+    val result = rdd.collect().sortBy(x => x.label)
+
+    assert(result.length == data.length)
+
+    def toArray(index: Int): Array[Float] = {
+      val labelPoint = result(index)
+      if (labelPoint.indices != null) {
+        Vectors.sparse(labelPoint.size,
+          labelPoint.indices,
+          labelPoint.values.map(_.toDouble)).toArray.map(_.toFloat)
+      } else {
+        labelPoint.values
+      }
     }
-  }
 
-  assert(result(0).label === 1.0f && result(0).baseMargin.isNaN &&
-    result(0).weight === 1.0f && toArray(0) === data(0).map(_.toFloat))
-  assert(result(1).label == 2.0f && result(1).baseMargin.isNaN &&
-    result(1).weight === 0.0f && toArray(1) === data(1).map(_.toFloat))
-  assert(result(2).label === 3.0f && result(2).baseMargin.isNaN &&
-    result(2).weight == 0.0f && toArray(2) === data(2).map(_.toFloat))
-  assert(result(3).label === 4.0f && result(3).baseMargin.isNaN &&
-    result(3).weight === -2.1f && toArray(3) === data(3).map(_.toFloat))
-}
+    assert(result(0).label === 1.0f && result(0).baseMargin.isNaN &&
+      result(0).weight === 1.0f && toArray(0) === data(0).map(_.toFloat))
+    assert(result(1).label == 2.0f && result(1).baseMargin.isNaN &&
+      result(1).weight === 0.0f && toArray(1) === data(1).map(_.toFloat))
+    assert(result(2).label === 3.0f && result(2).baseMargin.isNaN &&
+      result(2).weight == 0.0f && toArray(2) === data(2).map(_.toFloat))
+    assert(result(3).label === 4.0f && result(3).baseMargin.isNaN &&
+      result(3).weight === -2.1f && toArray(3) === data(3).map(_.toFloat))
+  }
 
-Seq((Float.NaN, 2), (0.0f, 7 + 2), (15.0f, 1 + 2), (10101011.0f, 0 + 2)).foreach {
-  case (missing, expectedMissingValue) =>
-    test(s"to RDD watches with missing $missing") {
-      val data = Array(
-        Array(1.0, 2.0, 3.0, 4.0, 5.0),
-        Array(1.0, Float.NaN, 0.0, 0.0, 2.0),
-        Array(12.0, 13.0, Float.NaN, 14.0, 15.0),
-        Array(0.0, 0.0, 0.0, 0.0, 0.0)
-      )
-      val dataset = ss.createDataFrame(sc.parallelize(Seq(
-        (1.0, 0, 0.5, 1.0, Vectors.dense(data(0)), "a"),
-        (2.0, 2, -0.5, 0.0, Vectors.dense(data(1)).toSparse, "b"),
-        (3.0, 3, -0.5, 0.0, Vectors.dense(data(2)), "b"),
-        (4.0, 4, -0.4, -2.1, Vectors.dense(data(3)), "c")
-      ))).toDF("label", "group", "margin", "weight", "features", "other")
-
-      val classifier = new XGBoostClassifier()
-        .setLabelCol("label")
-        .setFeaturesCol("features")
-        .setWeightCol("weight")
-        .setBaseMarginCol("margin")
-        .setMissing(missing)
-        .setNumWorkers(2)
-
-      val (df, indices) = classifier.preprocess(dataset)
-      val rdd = classifier.toRdd(df, indices)
-      val result = rdd.mapPartitions { iter =>
-        if (iter.hasNext) {
-          val watches = iter.next()
-          val size = watches.size
-          val trainDM = watches.toMap(TRAIN_NAME)
-          val rowNum = trainDM.rowNum
-          val labels = trainDM.getLabel
-          val weight = trainDM.getWeight
-          val margins = trainDM.getBaseMargin
-          val nonMissing = trainDM.nonMissingNum
-          watches.delete()
-          Iterator.single((size, rowNum, labels, weight, margins, nonMissing))
-        } else {
-          Iterator.empty
+  Seq((Float.NaN, 2), (0.0f, 7 + 2), (15.0f, 1 + 2), (10101011.0f, 0 + 2)).foreach {
+    case (missing, expectedMissingValue) =>
+      test(s"to RDD watches with missing $missing") {
+        val data = Array(
+          Array(1.0, 2.0, 3.0, 4.0, 5.0),
+          Array(1.0, Float.NaN, 0.0, 0.0, 2.0),
+          Array(12.0, 13.0, Float.NaN, 14.0, 15.0),
+          Array(0.0, 0.0, 0.0, 0.0, 0.0)
+        )
+        val dataset = ss.createDataFrame(sc.parallelize(Seq(
+          (1.0, 0, 0.5, 1.0, Vectors.dense(data(0)), "a"),
+          (2.0, 2, -0.5, 0.0, Vectors.dense(data(1)).toSparse, "b"),
+          (3.0, 3, -0.5, 0.0, Vectors.dense(data(2)), "b"),
+          (4.0, 4, -0.4, -2.1, Vectors.dense(data(3)), "c")
+        ))).toDF("label", "group", "margin", "weight", "features", "other")
+
+        val classifier = new XGBoostClassifier()
+          .setLabelCol("label")
+          .setFeaturesCol("features")
+          .setWeightCol("weight")
+          .setBaseMarginCol("margin")
+          .setMissing(missing)
+          .setNumWorkers(2)
+
+        val (df, indices) = classifier.preprocess(dataset)
+        val rdd = classifier.toRdd(df, indices)
+        val result = rdd.mapPartitions { iter =>
+          if (iter.hasNext) {
+            val watches = iter.next()
+            val size = watches.size
+            val trainDM = watches.toMap(TRAIN_NAME)
+            val rowNum = trainDM.rowNum
+            val labels = trainDM.getLabel
+            val weight = trainDM.getWeight
+            val margins = trainDM.getBaseMargin
+            val nonMissing = trainDM.nonMissingNum
+            watches.delete()
+            Iterator.single((size, rowNum, labels, weight, margins, nonMissing))
+          } else {
+            Iterator.empty
+          }
+        }.collect()
+
+        val labels: ArrayBuffer[Float] = ArrayBuffer.empty
+        val weight: ArrayBuffer[Float] = ArrayBuffer.empty
+        val margins: ArrayBuffer[Float] = ArrayBuffer.empty
+        var nonMissingValues = 0L
+        var totalRows = 0L
+
+        for (row <- result) {
+          assert(row._1 === 1)
+          totalRows = totalRows + row._2
+          labels.append(row._3: _*)
+          weight.append(row._4: _*)
+          margins.append(row._5: _*)
+          nonMissingValues = nonMissingValues + row._6
         }
-      }.collect()
-
-      val labels: ArrayBuffer[Float] = ArrayBuffer.empty
-      val weight: ArrayBuffer[Float] = ArrayBuffer.empty
-      val margins: ArrayBuffer[Float] = ArrayBuffer.empty
-      var nonMissingValues = 0L
-      var totalRows = 0L
-
-      for (row <- result) {
-        assert(row._1 === 1)
-        totalRows = totalRows + row._2
-        labels.append(row._3: _*)
-        weight.append(row._4: _*)
-        margins.append(row._5: _*)
-        nonMissingValues = nonMissingValues + row._6
+        assert(totalRows === 4)
+        assert(nonMissingValues === data.size * data(0).length - expectedMissingValue)
+        assert(labels.toArray.sorted === Array(1.0f, 2.0f, 3.0f, 4.0f).sorted)
+        assert(weight.toArray.sorted === Array(0.0f, 0.0f, 1.0f, -2.1f).sorted)
+        assert(margins.toArray.sorted === Array(-0.5f, -0.5f, -0.4f, 0.5f).sorted)
       }
-      assert(totalRows === 4)
-      assert(nonMissingValues === data.size * data(0).length - expectedMissingValue)
-      assert(labels.toArray.sorted === Array(1.0f, 2.0f, 3.0f, 4.0f).sorted)
-      assert(weight.toArray.sorted === Array(0.0f, 0.0f, 1.0f, -2.1f).sorted)
-      assert(margins.toArray.sorted === Array(-0.5f, -0.5f, -0.4f, 0.5f).sorted)
-    }
-}
+  }
 
-test("to RDD watches with eval") {
-  val trainData = Array(
-    Array(-1.0, -2.0, -3.0, -4.0, -5.0),
-    Array(2.0, 2.0, 2.0, 3.0, -2.0),
-    Array(-12.0, -13.0, -14.0, -14.0, -15.0),
-    Array(-20.5, -21.2, 0.0, 0.0, 2.0)
-  )
-  val trainDataset = ss.createDataFrame(sc.parallelize(Seq(
-    (11.0, 0, 0.15, 11.0, Vectors.dense(trainData(0)), "a"),
-    (12.0, 12, -0.15, 10.0, Vectors.dense(trainData(1)).toSparse, "b"),
-    (13.0, 12, -0.15, 10.0, Vectors.dense(trainData(2)), "b"),
-    (14.0, 12, -0.14, -12.1, Vectors.dense(trainData(3)), "c")
-  ))).toDF("label", "group", "margin", "weight", "features", "other")
-  val evalData = Array(
-    Array(1.0, 2.0, 3.0, 4.0, 5.0),
-    Array(0.0, 0.0, 0.0, 0.0, 2.0),
-    Array(12.0, 13.0, 14.0, 14.0, 15.0),
-    Array(20.5, 21.2, 0.0, 0.0, 2.0)
-  )
-  val evalDataset = ss.createDataFrame(sc.parallelize(Seq(
-    (1.0, 0, 0.5, 1.0, Vectors.dense(evalData(0)), "a"),
-    (2.0, 2, -0.5, 0.0, Vectors.dense(evalData(1)).toSparse, "b"),
-    (3.0, 2, -0.5, 0.0, Vectors.dense(evalData(2)), "b"),
-    (4.0, 2, -0.4, -2.1, Vectors.dense(evalData(3)), "c")
-  ))).toDF("label", "group", "margin", "weight", "features", "other")
-
-  val classifier = new XGBoostClassifier()
-    .setLabelCol("label")
-    .setFeaturesCol("features")
-    .setWeightCol("weight")
-    .setBaseMarginCol("margin")
-    .setEvalDataset(evalDataset)
-    .setNumWorkers(2)
-
-  val (df, indices) = classifier.preprocess(trainDataset)
-  val rdd = classifier.toRdd(df, indices)
-  val result = rdd.mapPartitions { iter =>
-    if (iter.hasNext) {
-      val watches = iter.next()
-      val size = watches.size
-      val evalDM = watches.toMap(Utils.VALIDATION_NAME)
-      val rowNum = evalDM.rowNum
-      val labels = evalDM.getLabel
-      val weight = evalDM.getWeight
-      val margins = evalDM.getBaseMargin
-      watches.delete()
-      Iterator.single((size, rowNum, labels, weight, margins))
-    } else {
-      Iterator.empty
+  test("to RDD watches with eval") {
+    val trainData = Array(
+      Array(-1.0, -2.0, -3.0, -4.0, -5.0),
+      Array(2.0, 2.0, 2.0, 3.0, -2.0),
+      Array(-12.0, -13.0, -14.0, -14.0, -15.0),
+      Array(-20.5, -21.2, 0.0, 0.0, 2.0)
+    )
+    val trainDataset = ss.createDataFrame(sc.parallelize(Seq(
+      (11.0, 0, 0.15, 11.0, Vectors.dense(trainData(0)), "a"),
+      (12.0, 12, -0.15, 10.0, Vectors.dense(trainData(1)).toSparse, "b"),
+      (13.0, 12, -0.15, 10.0, Vectors.dense(trainData(2)), "b"),
+      (14.0, 12, -0.14, -12.1, Vectors.dense(trainData(3)), "c")
+    ))).toDF("label", "group", "margin", "weight", "features", "other")
+    val evalData = Array(
+      Array(1.0, 2.0, 3.0, 4.0, 5.0),
+      Array(0.0, 0.0, 0.0, 0.0, 2.0),
+      Array(12.0, 13.0, 14.0, 14.0, 15.0),
+      Array(20.5, 21.2, 0.0, 0.0, 2.0)
+    )
+    val evalDataset = ss.createDataFrame(sc.parallelize(Seq(
+      (1.0, 0, 0.5, 1.0, Vectors.dense(evalData(0)), "a"),
+      (2.0, 2, -0.5, 0.0, Vectors.dense(evalData(1)).toSparse, "b"),
+      (3.0, 2, -0.5, 0.0, Vectors.dense(evalData(2)), "b"),
+      (4.0, 2, -0.4, -2.1, Vectors.dense(evalData(3)), "c")
+    ))).toDF("label", "group", "margin", "weight", "features", "other")
+
+    val classifier = new XGBoostClassifier()
+      .setLabelCol("label")
+      .setFeaturesCol("features")
+      .setWeightCol("weight")
+      .setBaseMarginCol("margin")
+      .setEvalDataset(evalDataset)
+      .setNumWorkers(2)
+
+    val (df, indices) = classifier.preprocess(trainDataset)
+    val rdd = classifier.toRdd(df, indices)
+    val result = rdd.mapPartitions { iter =>
+      if (iter.hasNext) {
+        val watches = iter.next()
+        val size = watches.size
+        val evalDM = watches.toMap(Utils.VALIDATION_NAME)
+        val rowNum = evalDM.rowNum
+        val labels = evalDM.getLabel
+        val weight = evalDM.getWeight
+        val margins = evalDM.getBaseMargin
+        watches.delete()
+        Iterator.single((size, rowNum, labels, weight, margins))
+      } else {
+        Iterator.empty
+      }
+    }.collect()
+
+    val labels: ArrayBuffer[Float] = ArrayBuffer.empty
+    val weight: ArrayBuffer[Float] = ArrayBuffer.empty
+    val margins: ArrayBuffer[Float] = ArrayBuffer.empty
+
+    var totalRows = 0L
+    for (row <- result) {
+      assert(row._1 === 2)
+      totalRows = totalRows + row._2
+      labels.append(row._3: _*)
+      weight.append(row._4: _*)
+      margins.append(row._5: _*)
     }
-  }.collect()
-
-  val labels: ArrayBuffer[Float] = ArrayBuffer.empty
-  val weight: ArrayBuffer[Float] = ArrayBuffer.empty
-  val margins: ArrayBuffer[Float] = ArrayBuffer.empty
-
-  var totalRows = 0L
-  for (row <- result) {
-    assert(row._1 === 2)
-    totalRows = totalRows + row._2
-    labels.append(row._3: _*)
-    weight.append(row._4: _*)
-    margins.append(row._5: _*)
+    assert(totalRows === 4)
+    assert(labels.toArray.sorted === Array(1.0f, 2.0f, 3.0f, 4.0f).sorted)
+    assert(weight.toArray.sorted === Array(0.0f, 0.0f, 1.0f, -2.1f).sorted)
+    assert(margins.toArray.sorted === Array(-0.5f, -0.5f, -0.4f, 0.5f).sorted)
   }
-  assert(totalRows === 4)
-  assert(labels.toArray.sorted === Array(1.0f, 2.0f, 3.0f, 4.0f).sorted)
-  assert(weight.toArray.sorted === Array(0.0f, 0.0f, 1.0f, -2.1f).sorted)
-  assert(margins.toArray.sorted === Array(-0.5f, -0.5f, -0.4f, 0.5f).sorted)
-}
 
-test("XGBoost-Spark model format should match xgboost4j") {
-  val trainingDF = buildDataFrame(MultiClassification.train)
-
-  Seq(new XGBoostClassifier()).foreach { est =>
-    est.setNumRound(5)
-    val model = est.fit(trainingDF)
-
-    // test json
-    val modelPath = new File(tempDir.toFile, "xgbc").getPath
-    model.write.overwrite().option("format", "json").save(modelPath)
-    val nativeJsonModelPath = new File(tempDir.toFile, "nativeModel.json").getPath
-    model.nativeBooster.saveModel(nativeJsonModelPath)
-    assert(compareTwoFiles(new File(modelPath, "data/model").getPath,
-      nativeJsonModelPath))
-
-    // test ubj
-    val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath
-    model.write.overwrite().save(modelUbjPath)
-    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel.ubj").getPath
-    model.nativeBooster.saveModel(nativeUbjModelPath)
-    assert(compareTwoFiles(new File(modelUbjPath, "data/model").getPath,
-      nativeUbjModelPath))
-
-    // json file should be indifferent with ubj file
-    val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath
-    model.write.overwrite().option("format", "json").save(modelJsonPath)
-    val nativeUbjModelPath1 = new File(tempDir.toFile, "nativeModel1.ubj").getPath
-    model.nativeBooster.saveModel(nativeUbjModelPath1)
-    assert(!compareTwoFiles(new File(modelJsonPath, "data/model").getPath,
-      nativeUbjModelPath1))
+  test("XGBoost-Spark model format should match xgboost4j") {
+    val trainingDF = buildDataFrame(MultiClassification.train)
+
+    Seq(new XGBoostClassifier()).foreach { est =>
+      est.setNumRound(5)
+      val model = est.fit(trainingDF)
+
+      // test json
+      val modelPath = new File(tempDir.toFile, "xgbc").getPath
+      model.write.overwrite().option("format", "json").save(modelPath)
+      val nativeJsonModelPath = new File(tempDir.toFile, "nativeModel.json").getPath
+      model.nativeBooster.saveModel(nativeJsonModelPath)
+      assert(compareTwoFiles(new File(modelPath, "data/model").getPath,
+        nativeJsonModelPath))
+
+      // test ubj
+      val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath
+      model.write.overwrite().save(modelUbjPath)
+      val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel.ubj").getPath
+      model.nativeBooster.saveModel(nativeUbjModelPath)
+      assert(compareTwoFiles(new File(modelUbjPath, "data/model").getPath,
+        nativeUbjModelPath))
+
+      // json file should be indifferent with ubj file
+      val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath
+      model.write.overwrite().option("format", "json").save(modelJsonPath)
+      val nativeUbjModelPath1 = new File(tempDir.toFile, "nativeModel1.ubj").getPath
+      model.nativeBooster.saveModel(nativeUbjModelPath1)
+      assert(!compareTwoFiles(new File(modelJsonPath, "data/model").getPath,
+        nativeUbjModelPath1))
+    }
   }
-}
 
-test("native json model file should store feature_name and feature_type") {
-  val featureNames = (1 to 33).map(idx => s"feature_${idx}").toArray
-  val featureTypes = (1 to 33).map(idx => "q").toArray
-  val trainingDF = buildDataFrame(MultiClassification.train)
-  val xgb = new XGBoostClassifier()
-    .setNumWorkers(numWorkers)
-    .setFeatureNames(featureNames)
-    .setFeatureTypes(featureTypes)
-    .setNumRound(2)
-  val model = xgb.fit(trainingDF)
-  val modelStr = new String(model.nativeBooster.toByteArray("json"))
-  val jsonModel = parseJson(modelStr)
-  implicit val formats: Formats = DefaultFormats
-  val featureNamesInModel = (jsonModel \ "learner" \ "feature_names").extract[List[String]]
-  val featureTypesInModel = (jsonModel \ "learner" \ "feature_types").extract[List[String]]
-  assert(featureNamesInModel.length == 33)
-  assert(featureTypesInModel.length == 33)
-  assert(featureNames sameElements featureNamesInModel)
-  assert(featureTypes sameElements featureTypesInModel)
-}
+  test("native json model file should store feature_name and feature_type") {
+    val featureNames = (1 to 33).map(idx => s"feature_${idx}").toArray
+    val featureTypes = (1 to 33).map(idx => "q").toArray
+    val trainingDF = buildDataFrame(MultiClassification.train)
+    val xgb = new XGBoostClassifier()
+      .setNumWorkers(numWorkers)
+      .setFeatureNames(featureNames)
+      .setFeatureTypes(featureTypes)
+      .setNumRound(2)
+    val model = xgb.fit(trainingDF)
+    val modelStr = new String(model.nativeBooster.toByteArray("json"))
+    val jsonModel = parseJson(modelStr)
+    implicit val formats: Formats = DefaultFormats
+    val featureNamesInModel = (jsonModel \ "learner" \ "feature_names").extract[List[String]]
+    val featureTypesInModel = (jsonModel \ "learner" \ "feature_types").extract[List[String]]
+    assert(featureNamesInModel.length == 33)
+    assert(featureTypesInModel.length == 33)
+    assert(featureNames sameElements featureNamesInModel)
+    assert(featureTypes sameElements featureTypesInModel)
+  }
 
 }
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/DMatrix.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/DMatrix.java
index 7bb8279c12b4..3fa3c692fcb5 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/DMatrix.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/DMatrix.java
@@ -1,5 +1,5 @@
 /*
- Copyright (c) 2014-2023 by Contributors
+ Copyright (c) 2014-2024 by Contributors
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -28,18 +28,10 @@
 public class DMatrix {
   protected long handle = 0;
 
-  /**
-   * sparse matrix type (CSR or CSC)
-   */
-  public static enum SparseType {
-    CSR,
-    CSC;
-  }
-
   /**
    * Create DMatrix from iterator.
    *
-   * @param iter The data iterator of mini batch to provide the data.
+   * @param iter      The data iterator of mini batch to provide the data.
    * @param cacheInfo Cache path information, used for external memory setting, can be null.
    * @throws XGBoostError
    */
@@ -50,9 +42,9 @@ public DMatrix(Iterator<LabeledPoint> iter, String cacheInfo) throws XGBoostErro
   /**
    * Create DMatrix from iterator.
    *
-   * @param iter The data iterator of mini batch to provide the data.
+   * @param iter      The data iterator of mini batch to provide the data.
    * @param cacheInfo Cache path information, used for external memory setting, can be null.
-   * @param missing the missing value
+   * @param missing   the missing value
    * @throws XGBoostError
    */
   public DMatrix(Iterator<LabeledPoint> iter,
@@ -87,10 +79,11 @@ public DMatrix(String dataPath) throws XGBoostError {
 
   /**
    * Create DMatrix from Sparse matrix in CSR/CSC format.
+   *
    * @param headers The row index of the matrix.
    * @param indices The indices of presenting entries.
-   * @param data The data content.
-   * @param st  Type of sparsity.
+   * @param data    The data content.
+   * @param st      Type of sparsity.
    * @throws XGBoostError
    */
   @Deprecated
@@ -101,12 +94,13 @@ public DMatrix(long[] headers, int[] indices, float[] data,
 
   /**
    * Create DMatrix from Sparse matrix in CSR/CSC format.
-   * @param headers The row index of the matrix.
-   * @param indices The indices of presenting entries.
-   * @param data The data content.
-   * @param st  Type of sparsity.
-   * @param shapeParam   when st is CSR, it specifies the column number, otherwise it is taken as
-   *                     row number
+   *
+   * @param headers    The row index of the matrix.
+   * @param indices    The indices of presenting entries.
+   * @param data       The data content.
+   * @param st         Type of sparsity.
+   * @param shapeParam when st is CSR, it specifies the column number, otherwise it is taken as
+   *                   row number
    * @throws XGBoostError
    */
   public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st,
@@ -136,7 +130,6 @@ public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType s
    * @param nrow number of rows
    * @param ncol number of columns
    * @throws XGBoostError native error
-   *
    * @deprecated Please specify the missing value explicitly using
    * {@link DMatrix(float[], int, int, float)}
    */
@@ -159,9 +152,10 @@ public DMatrix(BigDenseMatrix matrix) throws XGBoostError {
 
   /**
    * create DMatrix from dense matrix
-   * @param data data values
-   * @param nrow number of rows
-   * @param ncol number of columns
+   *
+   * @param data    data values
+   * @param nrow    number of rows
+   * @param ncol    number of columns
    * @param missing the specified value to represent the missing value
    */
   public DMatrix(float[] data, int nrow, int ncol, float missing) throws XGBoostError {
@@ -172,13 +166,14 @@ public DMatrix(float[] data, int nrow, int ncol, float missing) throws XGBoostEr
 
   /**
    * create DMatrix from dense matrix
-   * @param matrix instance of BigDenseMatrix
+   *
+   * @param matrix  instance of BigDenseMatrix
    * @param missing the specified value to represent the missing value
    */
   public DMatrix(BigDenseMatrix matrix, float missing) throws XGBoostError {
     long[] out = new long[1];
     XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromMatRef(matrix.address, matrix.nrow,
-        matrix.ncol, missing, out));
+                                                              matrix.ncol, missing, out));
     handle = out[0];
   }
 
@@ -191,10 +186,11 @@ protected DMatrix(long handle) {
 
   /**
    * Create the normal DMatrix from column array interface
-   * @param columnBatch the XGBoost ColumnBatch to provide the cuda array interface
+   *
+   * @param columnBatch the XGBoost ColumnBatch to provide the array interface
    *                    of feature columns
-   * @param missing missing value
-   * @param nthread threads number
+   * @param missing     missing value
+   * @param nthread     threads number
    * @throws XGBoostError
    */
   public DMatrix(ColumnBatch columnBatch, float missing, int nthread) throws XGBoostError {
@@ -204,41 +200,35 @@ public DMatrix(ColumnBatch columnBatch, float missing, int nthread) throws XGBoo
       throw new XGBoostError("Expecting non-empty feature columns' array interface");
     }
     XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromArrayInterfaceColumns(
-        json, missing, nthread, out));
+      json, missing, nthread, out));
     handle = out[0];
   }
 
   /**
-   * Set label of DMatrix from cuda array interface
-   *
-   * @param column the XGBoost Column to provide the cuda array interface
-   *               of label column
-   * @throws XGBoostError native error
+   * flatten a mat to array
    */
-  public void setLabel(Column column) throws XGBoostError {
-    setXGBDMatrixInfo("label", column.toJson());
-  }
+  private static float[] flatten(float[][] mat) {
+    int size = 0;
+    for (float[] array : mat) size += array.length;
+    float[] result = new float[size];
+    int pos = 0;
+    for (float[] ar : mat) {
+      System.arraycopy(ar, 0, result, pos, ar.length);
+      pos += ar.length;
+    }
 
-  /**
-   * Set weight of DMatrix from cuda array interface
-   *
-   * @param column the XGBoost Column to provide the cuda array interface
-   *               of weight column
-   * @throws XGBoostError native error
-   */
-  public void setWeight(Column column) throws XGBoostError {
-    setXGBDMatrixInfo("weight", column.toJson());
+    return result;
   }
 
   /**
-   * Set base margin of DMatrix from cuda array interface
+   * Set query id of DMatrix from array interface
    *
-   * @param column the XGBoost Column to provide the cuda array interface
-   *               of base margin column
+   * @param column the XGBoost Column to provide the array interface
+   *               of query id column
    * @throws XGBoostError native error
    */
-  public void setBaseMargin(Column column) throws XGBoostError {
-    setXGBDMatrixInfo("base_margin", column.toJson());
+  public void setQueryId(Column column) throws XGBoostError {
+    setXGBDMatrixInfo("qid", column.toJson());
   }
 
   private void setXGBDMatrixInfo(String type, String json) throws XGBoostError {
@@ -272,17 +262,9 @@ private String[] getXGBDMatrixFeatureInfo(String type) throws XGBoostError {
     return outValue[0];
   }
 
-  /**
-   * Set feature names
-   * @param values feature names to be set
-   * @throws XGBoostError
-   */
-  public void setFeatureNames(String[] values) throws XGBoostError {
-    setXGBDMatrixFeatureInfo("feature_name", values);
-  }
-
   /**
    * Get feature names
+   *
    * @return an array of feature names to be returned
    * @throws XGBoostError
    */
@@ -291,16 +273,18 @@ public String[] getFeatureNames() throws XGBoostError {
   }
 
   /**
-   * Set feature types
-   * @param values feature types to be set
+   * Set feature names
+   *
+   * @param values feature names to be set
    * @throws XGBoostError
    */
-  public void setFeatureTypes(String[] values) throws XGBoostError {
-    setXGBDMatrixFeatureInfo("feature_type", values);
+  public void setFeatureNames(String[] values) throws XGBoostError {
+    setXGBDMatrixFeatureInfo("feature_name", values);
   }
 
   /**
    * Get feature types
+   *
    * @return an array of feature types to be returned
    * @throws XGBoostError
    */
@@ -309,46 +293,23 @@ public String[] getFeatureTypes() throws XGBoostError {
   }
 
   /**
-   * set label of dmatrix
+   * Set feature types
    *
-   * @param labels labels
-   * @throws XGBoostError native error
+   * @param values feature types to be set
+   * @throws XGBoostError
    */
-  public void setLabel(float[] labels) throws XGBoostError {
-    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, "label", labels));
+  public void setFeatureTypes(String[] values) throws XGBoostError {
+    setXGBDMatrixFeatureInfo("feature_type", values);
   }
 
   /**
-   * set weight of each instance
+   * Get group sizes of DMatrix
    *
-   * @param weights weights
+   * @return group size as array
    * @throws XGBoostError native error
    */
-  public void setWeight(float[] weights) throws XGBoostError {
-    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights));
-  }
-
-  /**
-   * Set base margin (initial prediction).
-   *
-   * The margin must have the same number of elements as the number of
-   * rows in this matrix.
-   */
-  public void setBaseMargin(float[] baseMargin) throws XGBoostError {
-    if (baseMargin.length != rowNum()) {
-      throw new IllegalArgumentException(String.format(
-              "base margin must have exactly %s elements, got %s",
-              rowNum(), baseMargin.length));
-    }
-
-    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin));
-  }
-
-  /**
-   * Set base margin (initial prediction).
-   */
-  public void setBaseMargin(float[][] baseMargin) throws XGBoostError {
-    setBaseMargin(flatten(baseMargin));
+  public int[] getGroup() throws XGBoostError {
+    return getIntInfo("group_ptr");
   }
 
   /**
@@ -361,16 +322,6 @@ public void setGroup(int[] group) throws XGBoostError {
     XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetUIntInfo(handle, "group", group));
   }
 
-  /**
-   * Get group sizes of DMatrix
-   *
-   * @throws XGBoostError native error
-   * @return group size as array
-   */
-  public int[] getGroup() throws XGBoostError {
-    return getIntInfo("group_ptr");
-  }
-
   /**
    * Set query ids (used for ranking)
    *
@@ -403,6 +354,27 @@ public float[] getLabel() throws XGBoostError {
     return getFloatInfo("label");
   }
 
+  /**
+   * Set label of DMatrix from array interface
+   *
+   * @param column the XGBoost Column to provide the array interface
+   *               of label column
+   * @throws XGBoostError native error
+   */
+  public void setLabel(Column column) throws XGBoostError {
+    setXGBDMatrixInfo("label", column.toJson());
+  }
+
+  /**
+   * set label of dmatrix
+   *
+   * @param labels labels
+   * @throws XGBoostError native error
+   */
+  public void setLabel(float[] labels) throws XGBoostError {
+    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, "label", labels));
+  }
+
   /**
    * get weight of the DMatrix
    *
@@ -413,6 +385,27 @@ public float[] getWeight() throws XGBoostError {
     return getFloatInfo("weight");
   }
 
+  /**
+   * Set weight of DMatrix from array interface
+   *
+   * @param column the XGBoost Column to provide the array interface
+   *               of weight column
+   * @throws XGBoostError native error
+   */
+  public void setWeight(Column column) throws XGBoostError {
+    setXGBDMatrixInfo("weight", column.toJson());
+  }
+
+  /**
+   * set weight of each instance
+   *
+   * @param weights weights
+   * @throws XGBoostError native error
+   */
+  public void setWeight(float[] weights) throws XGBoostError {
+    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights));
+  }
+
   /**
    * Get base margin of the DMatrix.
    */
@@ -420,6 +413,40 @@ public float[] getBaseMargin() throws XGBoostError {
     return getFloatInfo("base_margin");
   }
 
+  /**
+   * Set base margin of DMatrix from array interface
+   *
+   * @param column the XGBoost Column to provide the array interface
+   *               of base margin column
+   * @throws XGBoostError native error
+   */
+  public void setBaseMargin(Column column) throws XGBoostError {
+    setXGBDMatrixInfo("base_margin", column.toJson());
+  }
+
+  /**
+   * Set base margin (initial prediction).
+   * <p>
+   * The margin must have the same number of elements as the number of
+   * rows in this matrix.
+   */
+  public void setBaseMargin(float[] baseMargin) throws XGBoostError {
+    if (baseMargin.length != rowNum()) {
+      throw new IllegalArgumentException(String.format(
+        "base margin must have exactly %s elements, got %s",
+        rowNum(), baseMargin.length));
+    }
+
+    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin));
+  }
+
+  /**
+   * Set base margin (initial prediction).
+   */
+  public void setBaseMargin(float[][] baseMargin) throws XGBoostError {
+    setBaseMargin(flatten(baseMargin));
+  }
+
   /**
    * Slice the DMatrix and return a new DMatrix that only contains `rowIndex`.
    *
@@ -473,22 +500,6 @@ public long getHandle() {
     return handle;
   }
 
-  /**
-   * flatten a mat to array
-   */
-  private static float[] flatten(float[][] mat) {
-    int size = 0;
-    for (float[] array : mat) size += array.length;
-    float[] result = new float[size];
-    int pos = 0;
-    for (float[] ar : mat) {
-      System.arraycopy(ar, 0, result, pos, ar.length);
-      pos += ar.length;
-    }
-
-    return result;
-  }
-
   @Override
   protected void finalize() {
     dispose();
@@ -500,4 +511,12 @@ public synchronized void dispose() {
       handle = 0;
     }
   }
+
+  /**
+   * sparse matrix type (CSR or CSC)
+   */
+  public enum SparseType {
+    CSR,
+    CSC
+  }
 }
diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/DMatrix.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/DMatrix.scala
index 3aaaeda0c894..294107f082fa 100644
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/DMatrix.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/DMatrix.scala
@@ -1,5 +1,5 @@
 /*
- Copyright (c) 2014-2023 by Contributors
+ Copyright (c) 2014-2024 by Contributors
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -33,13 +33,13 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
   }
 
   /**
-    *  init DMatrix from Iterator of LabeledPoint
-    *
-    * @param dataIter An iterator of LabeledPoint
-    * @param cacheInfo  Cache path information, used for external memory setting, null by default.
-    * @param missing Which value will be treated as the missing value
-    * @throws XGBoostError native error
-    */
+   * init DMatrix from Iterator of LabeledPoint
+   *
+   * @param dataIter  An iterator of LabeledPoint
+   * @param cacheInfo Cache path information, used for external memory setting, null by default.
+   * @param missing   Which value will be treated as the missing value
+   * @throws XGBoostError native error
+   */
   def this(dataIter: Iterator[LabeledPoint],
            cacheInfo: String = null,
            missing: Float = Float.NaN) {
@@ -63,12 +63,12 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
   /**
    * create DMatrix from sparse matrix
    *
-   * @param headers index to headers (rowHeaders for CSR or colHeaders for CSC)
-   * @param indices Indices (colIndexs for CSR or rowIndexs for CSC)
-   * @param data    non zero values (sequence by row for CSR or by col for CSC)
-   * @param st      sparse matrix type (CSR or CSC)
+   * @param headers    index to headers (rowHeaders for CSR or colHeaders for CSC)
+   * @param indices    Indices (colIndexs for CSR or rowIndexs for CSC)
+   * @param data       non zero values (sequence by row for CSR or by col for CSC)
+   * @param st         sparse matrix type (CSR or CSC)
    * @param shapeParam when st is CSR, it specifies the column number, otherwise it is taken as
-   *                     row number
+   *                   row number
    */
   @throws(classOf[XGBoostError])
   def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType,
@@ -79,14 +79,14 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
   /**
    * create DMatrix from sparse matrix
    *
-   * @param headers index to headers (rowHeaders for CSR or colHeaders for CSC)
-   * @param indices Indices (colIndexs for CSR or rowIndexs for CSC)
-   * @param data    non zero values (sequence by row for CSR or by col for CSC)
-   * @param st      sparse matrix type (CSR or CSC)
+   * @param headers    index to headers (rowHeaders for CSR or colHeaders for CSC)
+   * @param indices    Indices (colIndexs for CSR or rowIndexs for CSC)
+   * @param data       non zero values (sequence by row for CSR or by col for CSC)
+   * @param st         sparse matrix type (CSR or CSC)
    * @param shapeParam when st is CSR, it specifies the column number, otherwise it is taken as
-   *                     row number
-   * @param missing missing value
-   * @param nthread The number of threads used for constructing DMatrix
+   *                   row number
+   * @param missing    missing value
+   * @param nthread    The number of threads used for constructing DMatrix
    */
   @throws(classOf[XGBoostError])
   def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType,
@@ -96,10 +96,11 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
 
   /**
    * Create the normal DMatrix from column array interface
+   *
    * @param columnBatch the XGBoost ColumnBatch to provide the cuda array interface
    *                    of feature columns
-   * @param missing missing value
-   * @param nthread The number of threads used for constructing DMatrix
+   * @param missing     missing value
+   * @param nthread     The number of threads used for constructing DMatrix
    */
   @throws(classOf[XGBoostError])
   def this(columnBatch: ColumnBatch, missing: Float, nthread: Int) {
@@ -122,9 +123,9 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
   /**
    * create DMatrix from dense matrix
    *
-   * @param data data values
-   * @param nrow number of rows
-   * @param ncol number of columns
+   * @param data    data values
+   * @param nrow    number of rows
+   * @param ncol    number of columns
    * @param missing the specified value to represent the missing value
    */
   @throws(classOf[XGBoostError])
@@ -218,8 +219,17 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
     jDMatrix.setBaseMargin(column)
   }
 
+  /**
+   * set query id of dmatrix from column array interface
+   */
+  @throws(classOf[XGBoostError])
+  def setQueryId(column: Column): Unit = {
+    jDMatrix.setQueryId(column)
+  }
+
   /**
    * set feature names
+   *
    * @param values feature names
    * @throws ml.dmlc.xgboost4j.java.XGBoostError
    */
@@ -230,6 +240,7 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
 
   /**
    * set feature types
+   *
    * @param values feature types
    * @throws ml.dmlc.xgboost4j.java.XGBoostError
    */
@@ -278,6 +289,7 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
 
   /**
    * get feature names
+   *
    * @throws ml.dmlc.xgboost4j.java.XGBoostError
    * @return
    */
@@ -288,6 +300,7 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
 
   /**
    * get feature types
+   *
    * @throws ml.dmlc.xgboost4j.java.XGBoostError
    * @return
    */
diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu b/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu
index b784b21ec5f6..a705751b1583 100644
--- a/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu
+++ b/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu
@@ -104,7 +104,8 @@ void CopyInterface(std::vector<xgboost::ArrayInterface<1>> &interface_arr,
   }
 }
 
-void CopyMetaInfo(Json *p_interface, dh::device_vector<float> *out, cudaStream_t stream) {
+template <typename T>
+void CopyMetaInfo(Json *p_interface, dh::device_vector<T> *out, cudaStream_t stream) {
   auto &j_interface = *p_interface;
   CHECK_EQ(get<Array const>(j_interface).size(), 1);
   auto object = get<Object>(get<Array>(j_interface)[0]);
@@ -151,9 +152,11 @@ class DataIteratorProxy {
   std::vector<std::unique_ptr<dh::device_vector<float>>> labels_;
   std::vector<std::unique_ptr<dh::device_vector<float>>> weights_;
   std::vector<std::unique_ptr<dh::device_vector<float>>> base_margins_;
+  std::vector<std::unique_ptr<dh::device_vector<int>>> qids_;
   std::vector<Json> label_interfaces_;
   std::vector<Json> weight_interfaces_;
   std::vector<Json> margin_interfaces_;
+  std::vector<Json> qid_interfaces_;
 
   size_t it_{0};
   size_t n_batches_{0};
@@ -220,6 +223,16 @@ class DataIteratorProxy {
       Json::Dump(basemargin, &str);
       XGDMatrixSetInfoFromInterface(proxy_, "base_margin", str.c_str());
     }
+
+    if (json_map.find("qid") != json_map.cend()) {
+      Json qid = json_interface["qid"];
+      qids_.emplace_back(new dh::device_vector<int>);
+      CopyMetaInfo(&qid, qids_.back().get(), copy_stream_);
+      qid_interfaces_.emplace_back(qid);
+
+      Json::Dump(qid, &str);
+      XGDMatrixSetInfoFromInterface(proxy_, "qid", str.c_str());
+    }
   }
 
   void CloseJvmBatch() {
@@ -337,6 +350,12 @@ class DataIteratorProxy {
       XGDMatrixSetInfoFromInterface(proxy_, "base_margin", str.c_str());
     }
 
+    if (n_batches_ == this->qid_interfaces_.size()) {
+      auto const &qid = this->qid_interfaces_.at(it_);
+      Json::Dump(qid, &str);
+      XGDMatrixSetInfoFromInterface(proxy_, "qid", str.c_str());
+    }
+
     // Data
     auto const &json_interface = host_columns_.at(it_)->interfaces;