From 800445142832d42e45226305b4f167de727296a4 Mon Sep 17 00:00:00 2001 From: "Boris G. Tsirkin" Date: Tue, 2 May 2023 15:39:35 +0200 Subject: [PATCH] Scala 2.13 support. --- .github/workflows/jvm_tests.yml | 10 +++++++ jvm-packages/README.md | 29 ++++++++++++++++++- jvm-packages/pom.xml | 25 +++++++++++++--- jvm-packages/xgboost4j-example/pom.xml | 5 ++-- .../example/flink/DistTrainWithFlink.scala | 13 +++++---- jvm-packages/xgboost4j-flink/pom.xml | 4 ++- jvm-packages/xgboost4j-gpu/pom.xml | 9 +++--- jvm-packages/xgboost4j-spark-gpu/pom.xml | 7 +++-- jvm-packages/xgboost4j-spark/pom.xml | 7 +++-- jvm-packages/xgboost4j-tester/generate_pom.py | 4 +-- jvm-packages/xgboost4j/pom.xml | 9 +++--- .../ml/dmlc/xgboost4j/scala/EvalTrait.scala | 2 +- .../dmlc/xgboost4j/scala/ObjectiveTrait.scala | 2 +- .../ml/dmlc/xgboost4j/scala/XGBoost.scala | 21 ++++++++------ tests/ci_build/build_jvm_packages.sh | 9 +++++- 15 files changed, 114 insertions(+), 42 deletions(-) diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index a2d8bb69aa93..410bcc011798 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -75,3 +75,13 @@ jobs: if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows env: RABIT_MOCK: ON + + + - name: Build and Test XGBoost4J with scala 2.13 + run: | + rm -rfv build/ + cd jvm-packages + mvn -B test -Pdefault,scala-2.13 + if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows + env: + RABIT_MOCK: ON diff --git a/jvm-packages/README.md b/jvm-packages/README.md index c4c8898dd4ed..239464342d8a 100644 --- a/jvm-packages/README.md +++ b/jvm-packages/README.md @@ -36,6 +36,19 @@ XGBoost4J, XGBoost4J-Spark, etc. in maven repository is compiled with g++-4.8.5. latest_version_num ``` +or +``` + + ml.dmlc + xgboost4j_2.13 + latest_version_num + + + ml.dmlc + xgboost4j-spark_2.13 + latest_version_num + +``` sbt ```sbt @@ -47,7 +60,6 @@ libraryDependencies ++= Seq( For the latest release version number, please check [here](https://github.com/dmlc/xgboost/releases). -To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead. ### Access SNAPSHOT version @@ -85,6 +97,19 @@ Then add XGBoost4J as a dependency: latest_version_num-SNAPSHOT ``` +or with scala 2.13 +``` + + ml.dmlc + xgboost4j_2.13 + latest_version_num-SNAPSHOT + + + ml.dmlc + xgboost4j-spark_2.13 + latest_version_num-SNAPSHOT + +``` sbt ```sbt @@ -96,7 +121,9 @@ libraryDependencies ++= Seq( For the latest release version number, please check [the repository listing](https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html). +### GPU algorithm To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead. +Note that scala 2.13 is not supported by the [NVIDIA/spark-rapids#1525](https://github.com/NVIDIA/spark-rapids/issues/1525) yet, so the GPU algorithm can only be used with scala 2.12. ## Examples diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 4903b8f38153..3a752342b68f 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT pom XGBoost JVM Package @@ -34,6 +34,7 @@ 1.8 1.8 1.17.0 + 4.13.2 3.4.0 2.12.17 2.12 @@ -44,7 +45,9 @@ 23.04.0 23.04.0 cuda11 - + 3.2.15 + 2.9.0 + central_maven @@ -70,6 +73,14 @@ + + scala-2.13 + + 2.13 + 2.13.10 + + + gpu @@ -466,6 +477,7 @@ + com.esotericsoftware kryo @@ -482,6 +494,11 @@ scala-library ${scala.version} + + org.scala-lang.modules + scala-collection-compat_${scala.binary.version} + ${scala-collection-compat.version} + commons-logging commons-logging @@ -490,13 +507,13 @@ org.scalatest scalatest_${scala.binary.version} - 3.2.15 + ${scalatest.version} test org.scalactic scalactic_${scala.binary.version} - 3.2.15 + ${scalatest.version} test diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 40c9c72a446b..e6ed8a6001b0 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j-example_2.12 + xgboost4j-example + xgboost4j-example_${scala.binary.version} 2.0.0-SNAPSHOT jar diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala index cb859f62d328..3bfefb841ded 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala @@ -73,12 +73,13 @@ object DistTrainWithFlink { .map(_.f1.f0) .returns(testDataTypeHint) - val paramMap = mapAsJavaMap(Map( - ("eta", "0.1".asInstanceOf[AnyRef]), - ("max_depth", "2"), - ("objective", "binary:logistic"), - ("verbosity", "1") - )) + val paramMap = Map( + ("eta", "0.1".asInstanceOf[AnyRef]), + ("max_depth", "2"), + ("objective", "binary:logistic"), + ("verbosity", "1") + ) + .asJava // number of iterations val round = 2 diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index a9a80e29a033..8d51a9dcf9ce 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -5,9 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT + + xgboost4j-flink xgboost4j-flink_${scala.binary.version} 2.0.0-SNAPSHOT diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 1d7a06708208..f34680302e4e 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j-gpu_2.12 + xgboost4j-gpu_${scala.binary.version} + xgboost4j-gpu 2.0.0-SNAPSHOT jar @@ -35,13 +36,13 @@ junit junit - 4.13.2 + ${junit.version} test org.scalatest scalatest_${scala.binary.version} - 3.2.15 + ${scalatest.version} provided diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index bcb7edb2ad72..a19a71bb50cd 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j-spark-gpu_2.12 + xgboost4j-spark-gpu + xgboost4j-spark-gpu_${scala.binary.version} @@ -24,7 +25,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 2.0.0-SNAPSHOT + ${project.version} org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 3a84233d17e1..d8f4cb9146c8 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j-spark_2.12 + xgboost4j-spark + xgboost4j-spark_${scala.binary.version} @@ -24,7 +25,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 2.0.0-SNAPSHOT + ${project.version} org.apache.spark diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py index 06372e9b261c..eccfe904e6b1 100644 --- a/jvm-packages/xgboost4j-tester/generate_pom.py +++ b/jvm-packages/xgboost4j-tester/generate_pom.py @@ -8,10 +8,10 @@ 4.0.0 ml.dmlc - xgboost4j-tester_2.12 + xgboost4j-tester_${{scala.binary.version}} 1.0-SNAPSHOT - xgboost4j-tester_2.12 + xgboost4j-tester_${{scala.binary.version}} UTF-8 diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 3a1c4b2cf4f7..4352aab129b8 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j_2.12 + xgboost4j + xgboost4j_${scala.binary.version} 2.0.0-SNAPSHOT jar @@ -28,13 +29,13 @@ junit junit - 4.13.2 + ${junit.version} test org.scalatest scalatest_${scala.binary.version} - 3.2.15 + ${scalatest.version} provided diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala index 587ace352a8a..fe17804fda58 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala @@ -37,7 +37,7 @@ trait EvalTrait extends IEvaluation { */ def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float - private[scala] def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = { + def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = { require(predicts.length == jdmat.getLabel.length, "predicts size and label size must match " + s" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}") eval(predicts, new DMatrix(jdmat)) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala index 24e603762a74..de218f0c53dc 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala @@ -31,7 +31,7 @@ trait ObjectiveTrait extends IObjective { */ def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]] - private[scala] def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix): + def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix): java.util.List[Array[Float]] = { getGradient(predicts, new DMatrix(dtrain)).asJava } diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 90d06c343205..8d6804148633 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -17,12 +17,11 @@ package ml.dmlc.xgboost4j.scala import java.io.InputStream +import ml.dmlc.xgboost4j.java.{XGBoostError, XGBoost => JXGBoost} -import ml.dmlc.xgboost4j.java.{XGBoostError, Booster => JBooster, XGBoost => JXGBoost} -import scala.collection.JavaConverters._ - +import scala.jdk.CollectionConverters._ import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.fs.Path /** * XGBoost Scala Training function. @@ -40,7 +39,12 @@ object XGBoost { earlyStoppingRound: Int = 0, prevBooster: Booster, checkpointParams: Option[ExternalCheckpointParams]): Booster = { - val jWatches = watches.mapValues(_.jDMatrix).asJava + + // we have to filter null value for customized obj and eval + val jParams: java.util.Map[String, AnyRef] = + params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).toMap.asJava + + val jWatches = watches.mapValues(_.jDMatrix).toMap.asJava val jBooster = if (prevBooster == null) { null } else { @@ -49,10 +53,10 @@ object XGBoost { val xgboostInJava = checkpointParams. map(cp => { + // we have to filter null value for customized obj and eval JXGBoost.trainAndSaveCheckpoint( dtrain.jDMatrix, - // we have to filter null value for customized obj and eval - params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava, + jParams, numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster, cp.checkpointInterval, cp.checkpointPath, @@ -61,8 +65,7 @@ object XGBoost { getOrElse( JXGBoost.train( dtrain.jDMatrix, - // we have to filter null value for customized obj and eval - params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava, + jParams, numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster) ) if (prevBooster == null) { diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh index 241fc445f640..5797a1f61964 100755 --- a/tests/ci_build/build_jvm_packages.sh +++ b/tests/ci_build/build_jvm_packages.sh @@ -6,6 +6,7 @@ set -x spark_version=$1 use_cuda=$2 gpu_arch=$3 +use_scala213=$4 gpu_options="" if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then @@ -22,7 +23,13 @@ export RABIT_MOCK=ON if [ "x$gpu_arch" != "x" ]; then export GPU_ARCH_FLAG=$gpu_arch fi -mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options + +mvn_profile_string="" +if [ "x$use_scala213" != "x" ]; then + export mvn_profile_string="-Pdefault,scala-2.13" +fi + +mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options set +x set +e