apache-spark-on-k8s · erikerlandson · Sep 28, 2017 · Jul 15, 2017 · Jul 15, 2017 · Jul 18, 2017
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -575,7 +575,7 @@ object SparkSubmit extends CommandLineUtils {
     }
 
     // assure a keytab is available from any place in a JVM
-    if (clusterManager == YARN || clusterManager == LOCAL) {
+    if (clusterManager == YARN || clusterManager == KUBERNETES || clusterManager == LOCAL) {
       if (args.principal != null) {
         require(args.keytab != null, "Keytab must be specified when principal is specified")
         if (!new File(args.keytab).exists()) {

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
@@ -783,6 +783,50 @@ from the other deployment modes. See the [configuration page](configuration.html
   </td>
 </tr>
 <tr>
+  <td><code>spark.kubernetes.kerberos.enabled</code></td> 
+  <td>false</td>
+  <td>
+    Specify whether your job requires a Kerberos Authentication to access HDFS. By default, we
+    will assume that you will not require secure HDFS access. 
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.kerberos.keytab</code></td> 
+  <td>(none)</td>
+  <td>
+    Assuming you have set <code>spark.kubernetes.kerberos.enabled</code> to be true. This will let you specify 
+    the location of your Kerberos keytab to be used in order to access Secure HDFS. This is optional as you 
+    may login by running <code>kinit</code> before running the spark-submit, and the submission client
+    will look within your local TGT cache to resolve this. 
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.kerberos.principal</code></td> 
+  <td>(none)</td>
+  <td>
+    Assuming you have set <code>spark.kubernetes.kerberos.enabled</code> to be true. This will let you specify 
+    your Kerberos principal that you wish to use to access Secure HDFS. This is optional as you 
+    may login by running <code>kinit</code> before running the spark-submit, and the submission client
+    will look within your local TGT cache to resolve this. 
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.kerberos.tokensecret.name</code></td> 
+  <td>(none)</td>
+  <td>
+    Assuming you have set <code>spark.kubernetes.kerberos.enabled</code> to be true. This will let you specify 
+    the name of the secret where your existing delegation token data is stored. You must also specify the 
+    item key <code>spark.kubernetes.kerberos.tokensecret.itemkey</code> where your data is stored on the secret. 
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.kerberos.tokensecret.itemkey</code></td> 
+  <td>spark.kubernetes.kerberos.dt.label</td>
+  <td>
+    Assuming you have set <code>spark.kubernetes.kerberos.enabled</code> to be true. This will let you specify 
+    the label within the pre-specified secret where the data of your existing delegation token data is stored. 
+    We have a default value of <code>spark.kubernetes.kerberos.tokensecret.itemkey</code> should you not include it. But
+    you should always include this if you are proposing a pre-existing secret contain the delegation token data.
   <td><code>spark.executorEnv.[EnvironmentVariableName]</code></td> 
   <td>(none)</td>
   <td>

diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
@@ -100,6 +100,12 @@
     <dependency>
       <groupId>com.fasterxml.jackson.jaxrs</groupId>
       <artifactId>jackson-jaxrs-json-provider</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.ws.rs</groupId>
+          <artifactId>jsr311-api</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>javax.ws.rs</groupId>

diff --git a/...bernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/HadoopConfBootstrap.scala b/...bernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/HadoopConfBootstrap.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.kubernetes
+
+import java.io.File
+
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, KeyToPathBuilder, PodBuilder}
+
+import org.apache.spark.deploy.kubernetes.constants._
+import org.apache.spark.internal.Logging
+
+/**
+ * This is separated out from the HadoopConf steps API because this component can be reused to
+ * set up the Hadoop Configuration for executors as well.
+ */
+private[spark] trait HadoopConfBootstrap {
+ /**
+  * Bootstraps a main container with the ConfigMaps containing Hadoop config files
+  * mounted as volumes and an ENV variable pointing to the mounted file.
+  */
+  def bootstrapMainContainerAndVolumes(
+    originalPodWithMainContainer: PodWithMainContainer)
+  : PodWithMainContainer
+}
+
+private[spark] class HadoopConfBootstrapImpl(
+  hadoopConfConfigMapName: String,
+  hadoopConfigFiles: Seq[File],
+  hadoopUGI: HadoopUGIUtil) extends HadoopConfBootstrap with Logging{
+
+  override def bootstrapMainContainerAndVolumes(
+    originalPodWithMainContainer: PodWithMainContainer)
+    : PodWithMainContainer = {
+    logInfo("HADOOP_CONF_DIR defined. Mounting HDFS specific .xml files")
+    val keyPaths = hadoopConfigFiles.map(file =>
+      new KeyToPathBuilder()
+        .withKey(file.toPath.getFileName.toString)
+        .withPath(file.toPath.getFileName.toString)
+      .build()).toList
+    val hadoopSupportedPod = new PodBuilder(originalPodWithMainContainer.pod)
+      .editSpec()
+        .addNewVolume()
+          .withName(HADOOP_FILE_VOLUME)
+            .withNewConfigMap()
+              .withName(hadoopConfConfigMapName)
+              .withItems(keyPaths.asJava)
+            .endConfigMap()
+          .endVolume()
+        .endSpec()
+      .build()
+    val mainContainerWithMountedHadoopConf = new ContainerBuilder(
+      originalPodWithMainContainer.mainContainer)
+      .addNewVolumeMount()
+        .withName(HADOOP_FILE_VOLUME)
+        .withMountPath(HADOOP_CONF_DIR_PATH)
+        .endVolumeMount()
+      .addNewEnv()
+        .withName(ENV_HADOOP_CONF_DIR)
+        .withValue(HADOOP_CONF_DIR_PATH)
+        .endEnv()
+      .addNewEnv()
+        .withName(ENV_SPARK_USER)
+        .withValue(hadoopUGI.getShortName)
+        .endEnv()
+      .build()
+    originalPodWithMainContainer.copy(
+      pod = hadoopSupportedPod,
+      mainContainer = mainContainerWithMountedHadoopConf)
+  }
+}
diff --git a/...ers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/HadoopUGIUtil.scala b/...ers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/HadoopUGIUtil.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.kubernetes
+
+import org.apache.hadoop.security.UserGroupInformation
+
+private[spark] class HadoopUGIUtil{
+  def getCurrentUser: UserGroupInformation = UserGroupInformation.getCurrentUser
+  def getShortName: String = getCurrentUser.getShortUserName
+  def isSecurityEnabled: Boolean = UserGroupInformation.isSecurityEnabled
+  def loginUserFromKeytabAndReturnUGI(principal: String, keytab: String): UserGroupInformation =
+    UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
+}
diff --git a/...s/core/src/main/scala/org/apache/spark/deploy/kubernetes/KerberosTokenConfBootstrap.scala b/...s/core/src/main/scala/org/apache/spark/deploy/kubernetes/KerberosTokenConfBootstrap.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.kubernetes
+
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder}
+
+import org.apache.spark.deploy.kubernetes.constants._
+import org.apache.spark.internal.Logging
+
+
+ /**
+  * This is separated out from the HadoopConf steps API because this component can be reused to
+  * mounted the DT secret for executors as well.
+  */
+private[spark] trait KerberosTokenBootstrapConf {
+  // Bootstraps a main container with the Secret mounted as volumes and an ENV variable
+  // pointing to the mounted file containing the DT for Secure HDFS interaction
+  def bootstrapMainContainerAndVolumes(
+  originalPodWithMainContainer: PodWithMainContainer)
+  : PodWithMainContainer
+}
+
+private[spark] class KerberosTokenConfBootstrapImpl(
+  secretName: String,
+  secretItemKey: String,
+  userName: String) extends KerberosTokenBootstrapConf with Logging{
+
+
+  override def bootstrapMainContainerAndVolumes(
+  originalPodWithMainContainer: PodWithMainContainer)
+  : PodWithMainContainer = {
+    logInfo("Mounting HDFS DT from Secret for Secure HDFS")
+    val dtMountedPod = new PodBuilder(originalPodWithMainContainer.pod)
+      .editOrNewSpec()
+        .addNewVolume()
+          .withName(SPARK_APP_HADOOP_SECRET_VOLUME_NAME)
+          .withNewSecret()
+            .withSecretName(secretName)
+            .endSecret()
+          .endVolume()
+        .endSpec()
+      .build()
+    val mainContainerWithMountedKerberos = new ContainerBuilder(
+      originalPodWithMainContainer.mainContainer)
+      .addNewVolumeMount()
+        .withName(SPARK_APP_HADOOP_SECRET_VOLUME_NAME)
+        .withMountPath(SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR)
+        .endVolumeMount()
+      .addNewEnv()
+        .withName(ENV_HADOOP_TOKEN_FILE_LOCATION)
+        .withValue(s"$SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR/$secretItemKey")
+        .endEnv()
+      .addNewEnv()
+        .withName(ENV_SPARK_USER)
+        .withValue(userName)
+        .endEnv()
+      .build()
+    originalPodWithMainContainer.copy(
+      pod = dtMountedPod,
+      mainContainer = mainContainerWithMountedKerberos)
+  }
+}
diff --git a/...ernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithMainContainer.scala b/...ernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithMainContainer.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.kubernetes
+
+import io.fabric8.kubernetes.api.model.{Container, Pod}
+
+ /**
+  * The purpose of this case class is so that we can package together
+  * the driver pod with its container so we can bootstrap and modify
+  * the class instead of each component seperately
+  */
+private[spark] case class PodWithMainContainer(
+    pod: Pod,
+    mainContainer: Container)
diff --git a/...e-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/...e-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala
@@ -542,6 +542,42 @@ package object config extends Logging {
 
   private[spark] val KUBERNETES_NODE_SELECTOR_PREFIX = "spark.kubernetes.node.selector."
 
+  private[spark] val KUBERNETES_KERBEROS_SUPPORT =
+    ConfigBuilder("spark.kubernetes.kerberos.enabled")
+      .doc("Specify whether your job is a job that will require a Delegation Token to access HDFS")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val KUBERNETES_KERBEROS_KEYTAB =
+    ConfigBuilder("spark.kubernetes.kerberos.keytab")
+      .doc("Specify the location of keytab" +
+        " for Kerberos in order to access Secure HDFS")
+      .stringConf
+      .createOptional
+
+  private[spark] val KUBERNETES_KERBEROS_PRINCIPAL =
+    ConfigBuilder("spark.kubernetes.kerberos.principal")
+      .doc("Specify the principal" +
+        " for Kerberos in order to access Secure HDFS")
+      .stringConf
+      .createOptional
+
+  private[spark] val KUBERNETES_KERBEROS_DT_SECRET_NAME =
+    ConfigBuilder("spark.kubernetes.kerberos.tokensecret.name")
+      .doc("Specify the name of the secret where " +
+        " your existing delegation token is stored. This removes the need" +
+        " for the job user to provide any keytab for launching a job")
+      .stringConf
+      .createOptional
+
+  private[spark] val KUBERNETES_KERBEROS_DT_SECRET_ITEM_KEY =
+    ConfigBuilder("spark.kubernetes.kerberos.tokensecret.itemkey")
+      .doc("Specify the item key of the data where " +
+        " your existing delegation token is stored. This removes the need" +
+        " for the job user to provide any keytab for launching a job")
+      .stringConf
+      .createOptional
+
   private[spark] def resolveK8sMaster(rawMasterString: String): String = {
     if (!rawMasterString.startsWith("k8s://")) {
       throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.")

diff --git a/...anagers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/...anagers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala
@@ -45,9 +45,6 @@ package object constants {
 
   // Hadoop credentials secrets for the Spark app.
   private[spark] val SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR = "/mnt/secrets/hadoop-credentials"
-  private[spark] val SPARK_APP_HADOOP_TOKEN_FILE_SECRET_NAME = "hadoop-token-file"
-  private[spark] val SPARK_APP_HADOOP_TOKEN_FILE_PATH =
-    s"$SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR/$SPARK_APP_HADOOP_TOKEN_FILE_SECRET_NAME"
   private[spark] val SPARK_APP_HADOOP_SECRET_VOLUME_NAME = "hadoop-secret"
 
   // Default and fixed ports
@@ -79,6 +76,7 @@ package object constants {
   private[spark] val ENV_JAVA_OPT_PREFIX = "SPARK_JAVA_OPT_"
   private[spark] val ENV_MOUNTED_FILES_FROM_SECRET_DIR = "SPARK_MOUNTED_FILES_FROM_SECRET_DIR"
   private[spark] val ENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"
+  private[spark] val ENV_SPARK_USER = "SPARK_USER"
 
   // Bootstrapping dependencies with the init-container
   private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers"
@@ -101,6 +99,26 @@ package object constants {
   private[spark] val DEFAULT_SHUFFLE_MOUNT_NAME = "shuffle"
   private[spark] val INIT_CONTAINER_SECRET_VOLUME_NAME = "spark-init-secret"
 
+  // Hadoop Configuration
+  private[spark] val HADOOP_FILE_VOLUME = "hadoop-properties"
+  private[spark] val HADOOP_CONF_DIR_PATH = "/etc/hadoop/conf"
+  private[spark] val ENV_HADOOP_CONF_DIR = "HADOOP_CONF_DIR"
+  private[spark] val HADOOP_CONF_DIR_LOC = "spark.kubernetes.hadoop.conf.dir"
+  private[spark] val HADOOP_CONFIG_MAP_SPARK_CONF_NAME =
+    "spark.kubernetes.hadoop.executor.hadoopconfigmapname"
+
+  // Kerberos Configuration
+  private[spark] val HADOOP_KERBEROS_SECRET_NAME =
+    "spark.kubernetes.kerberos.dt"
+  private[spark] val HADOOP_KERBEROS_CONF_SECRET =
+    "spark.kubernetes.kerberos.secretname"
+  private[spark] val HADOOP_KERBEROS_CONF_ITEM_KEY =
+    "spark.kubernetes.kerberos.itemkeyname"
+  private[spark] val KERBEROS_SECRET_LABEL_PREFIX =
+    "hadoop-tokens"
+  private[spark] val SPARK_HADOOP_PREFIX = "spark.hadoop."
+  private[spark] val HADOOP_SECURITY_AUTHENTICATION =
+    SPARK_HADOOP_PREFIX + "hadoop.security.authentication"
   // Bootstrapping dependencies via a secret
   private[spark] val MOUNTED_SMALL_FILES_SECRET_MOUNT_PATH = "/etc/spark-submitted-files"