From ce4e7b75f64442e3166842adda7e8b30623843aa Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 10 Jan 2023 18:05:52 +0900 Subject: [PATCH] [SPARK-41958][CORE] Disallow arbitrary custom classpath with proxy user in cluster mode This PR proposes to disallow arbitrary custom classpath with proxy user in cluster mode by default. To avoid arbitrary classpath in spark cluster. Yes. User should reenable this feature by `spark.submit.proxyUser.allowCustomClasspathInClusterMode`. Manually tested. Closes #39474 from Ngone51/dev. Lead-authored-by: Peter Toth Co-authored-by: Yi Wu Signed-off-by: Hyukjin Kwon --- .../org/apache/spark/deploy/SparkSubmit.scala | 16 ++++++++++++++++ .../apache/spark/internal/config/package.scala | 7 +++++++ 2 files changed, 23 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index a785652ee5340..22afc50e63151 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -303,6 +303,10 @@ private[spark] class SparkSubmit extends Logging { val isKubernetesClient = clusterManager == KUBERNETES && deployMode == CLIENT val isKubernetesClusterModeDriver = isKubernetesClient && sparkConf.getBoolean("spark.kubernetes.submitInDriver", false) + val isCustomClasspathInClusterModeDisallowed = + !sparkConf.get(ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE) && + args.proxyUser != null && + (isYarnCluster || isMesosCluster || isStandAloneCluster || isKubernetesCluster) if (!isMesosCluster && !isStandAloneCluster) { // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files @@ -860,6 +864,14 @@ private[spark] class SparkSubmit extends Logging { if (args.verbose) { childArgs ++= Seq("--verbose") } + + if (childClasspath.nonEmpty && isCustomClasspathInClusterModeDisallowed) { + childClasspath.clear() + logWarning(s"Ignore classpath ${childClasspath.mkString(", ")} with proxy user specified " + + s"in Cluster mode when ${ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE.key} is " + + s"disabled") + } + (childArgs.toSeq, childClasspath.toSeq, sparkConf, childMainClass) } @@ -913,6 +925,10 @@ private[spark] class SparkSubmit extends Logging { logInfo(s"Classpath elements:\n${childClasspath.mkString("\n")}") logInfo("\n") } + assert(!(args.deployMode == "cluster" && args.proxyUser != null && childClasspath.nonEmpty) || + sparkConf.get(ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE), + s"Classpath of spark-submit should not change in cluster mode if proxy user is specified " + + s"when ${ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE.key} is disabled") val loader = getSubmitClassLoader(sparkConf) for (jar <- childClasspath) { addJarToClasspath(jar, loader) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 5570ab6d90446..10a6eaff537e8 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2256,4 +2256,11 @@ package object config { .version("3.2.0") .stringConf .createOptional + + private[spark] val ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE = + ConfigBuilder("spark.submit.proxyUser.allowCustomClasspathInClusterMode") + .internal() + .version("3.4.0") + .booleanConf + .createWithDefault(false) }