From 597d5a298d708145ced947c5596273be35374881 Mon Sep 17 00:00:00 2001 From: Jeremy Liu Date: Mon, 13 Jul 2020 13:27:21 -0400 Subject: [PATCH] Add pre-installed conda configuration and use to find rlib directory (#700) * Add pre-installed conda environment variable * Update FORK.md Co-authored-by: Jeremy Liu --- FORK.md | 1 + .../scala/org/apache/spark/api/r/RRunner.scala | 14 ++++++++++++-- .../org/apache/spark/internal/config/package.scala | 5 +++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/FORK.md b/FORK.md index 0e34d99e90ef1..151fa8cc96874 100644 --- a/FORK.md +++ b/FORK.md @@ -26,6 +26,7 @@ * Gradle plugin to easily create custom docker images for use with k8s * Filter rLibDir by exists so that daemon.R references the correct file [460](https://github.com/palantir/spark/pull/460) * Implementation of the shuffle I/O plugins from SPARK-25299 that asynchronously backs up shuffle files to remote storage +* Add pre-installed conda configuration and use to find rlib directory [700](https://github.com/palantir/spark/pull/700) # Reverted * [SPARK-25908](https://issues.apache.org/jira/browse/SPARK-25908) - Removal of `monotonicall_increasing_id`, `toDegree`, `toRadians`, `approxCountDistinct`, `unionAll` diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala index 93ed77d7b7f86..e8fa16a830aa8 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala @@ -31,6 +31,7 @@ import org.apache.spark.broadcast.Broadcast import org.apache.spark.deploy.Common.Provenance import org.apache.spark.internal.Logging import org.apache.spark.internal.config.BUFFER_SIZE +import org.apache.spark.internal.config.CONDA_PRE_INSTALLED_PATH import org.apache.spark.internal.config.R._ import org.apache.spark.util.Utils @@ -363,6 +364,7 @@ private[r] object RRunner { val sparkConf = SparkEnv.get.conf val requestedRCommand = Provenance.fromConfOpt(sparkConf, R_COMMAND) .getOrElse(Provenance.fromConf(sparkConf, SPARKR_COMMAND)) + val preInstalledCondaPath = Provenance.fromConfOpt(sparkConf, CONDA_PRE_INSTALLED_PATH) val condaEnv = condaSetupInstructions.map(CondaEnvironmentManager.getOrCreateCondaEnvironment) val rCommand = condaEnv.map { conda => if (requestedRCommand.value != SPARKR_COMMAND.defaultValue.get) { @@ -375,9 +377,17 @@ private[r] object RRunner { val rConnectionTimeout = sparkConf.get(R_BACKEND_CONNECTION_TIMEOUT) val rOptions = "--vanilla" + val rLibPath = "/lib/R/library" val rLibDir = condaEnv.map(conda => - RUtils.sparkRPackagePath(isDriver = false) :+ (conda.condaEnvDir + "/lib/R/library")) - .getOrElse(RUtils.sparkRPackagePath(isDriver = false)) + RUtils.sparkRPackagePath(isDriver = false) :+ (conda.condaEnvDir + rLibPath)) + .getOrElse({ + val sparkRPackagePaths = RUtils.sparkRPackagePath(isDriver = false) + if (preInstalledCondaPath.isDefined) { + sparkRPackagePaths :+ (preInstalledCondaPath.get + rLibPath) + } else { + sparkRPackagePaths + } + }) .filter(dir => new File(dir).exists) if (rLibDir.isEmpty) { throw new SparkException("SparkR package is not installed on executor.") diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 47d571cd1ebd7..1b5a94cf8fde5 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -543,6 +543,11 @@ package object config { .stringConf .createOptional + private[spark] val CONDA_PRE_INSTALLED_PATH = ConfigBuilder("spark.conda.preInstalledPath") + .doc("The path to pre-installed conda directory.") + .stringConf + .createOptional + private[spark] val CONDA_VERBOSITY = ConfigBuilder("spark.conda.verbosity") .doc("How many times to apply -v to conda. A number between 0 and 3, with 0 being default.") .intConf