diff --git a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
index fac834a70b893..178bdcfccb603 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
@@ -25,9 +25,10 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
+import org.apache.spark.internal.Logging
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
-private[deploy] object DependencyUtils {
+private[deploy] object DependencyUtils extends Logging {
 
   def resolveMavenDependencies(
       packagesExclusions: String,
@@ -75,7 +76,7 @@ private[deploy] object DependencyUtils {
   def addJarsToClassPath(jars: String, loader: MutableURLClassLoader): Unit = {
     if (jars != null) {
       for (jar <- jars.split(",")) {
-        SparkSubmit.addJarToClasspath(jar, loader)
+        addJarToClasspath(jar, loader)
       }
     }
   }
@@ -151,6 +152,31 @@ private[deploy] object DependencyUtils {
     }.mkString(",")
   }
 
+  def addJarToClasspath(localJar: String, loader: MutableURLClassLoader): Unit = {
+    val uri = Utils.resolveURI(localJar)
+    uri.getScheme match {
+      case "file" | "local" =>
+        val file = new File(uri.getPath)
+        if (file.exists()) {
+          loader.addURL(file.toURI.toURL)
+        } else {
+          logWarning(s"Local jar $file does not exist, skipping.")
+        }
+      case _ =>
+        logWarning(s"Skip remote jar $uri.")
+    }
+  }
+
+  /**
+   * Merge a sequence of comma-separated file lists, some of which may be null to indicate
+   * no files, into a single comma-separated string.
+   */
+  def mergeFileLists(lists: String*): String = {
+    val merged = lists.filterNot(StringUtils.isBlank)
+      .flatMap(Utils.stringToSeq)
+    if (merged.nonEmpty) merged.mkString(",") else null
+  }
+
   private def splitOnFragment(path: String): (URI, Option[String]) = {
     val uri = Utils.resolveURI(path)
     val withoutFragment = new URI(uri.getScheme, uri.getSchemeSpecificPart, null)
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index eddbedeb1024d..427c797755b84 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -58,7 +58,7 @@ import org.apache.spark.util._
  */
 private[deploy] object SparkSubmitAction extends Enumeration {
   type SparkSubmitAction = Value
-  val SUBMIT, KILL, REQUEST_STATUS = Value
+  val SUBMIT, KILL, REQUEST_STATUS, PRINT_VERSION = Value
 }
 
 /**
@@ -67,78 +67,32 @@ private[deploy] object SparkSubmitAction extends Enumeration {
  * This program handles setting up the classpath with relevant Spark dependencies and provides
  * a layer over the different cluster managers and deploy modes that Spark supports.
  */
-object SparkSubmit extends CommandLineUtils with Logging {
+private[spark] class SparkSubmit extends Logging {
 
   import DependencyUtils._
+  import SparkSubmit._
 
-  // Cluster managers
-  private val YARN = 1
-  private val STANDALONE = 2
-  private val MESOS = 4
-  private val LOCAL = 8
-  private val KUBERNETES = 16
-  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL | KUBERNETES
-
-  // Deploy modes
-  private val CLIENT = 1
-  private val CLUSTER = 2
-  private val ALL_DEPLOY_MODES = CLIENT | CLUSTER
-
-  // Special primary resource names that represent shells rather than application jars.
-  private val SPARK_SHELL = "spark-shell"
-  private val PYSPARK_SHELL = "pyspark-shell"
-  private val SPARKR_SHELL = "sparkr-shell"
-  private val SPARKR_PACKAGE_ARCHIVE = "sparkr.zip"
-  private val R_PACKAGE_ARCHIVE = "rpkg.zip"
-
-  private val CLASS_NOT_FOUND_EXIT_STATUS = 101
-
-  // Following constants are visible for testing.
-  private[deploy] val YARN_CLUSTER_SUBMIT_CLASS =
-    "org.apache.spark.deploy.yarn.YarnClusterApplication"
-  private[deploy] val REST_CLUSTER_SUBMIT_CLASS = classOf[RestSubmissionClientApp].getName()
-  private[deploy] val STANDALONE_CLUSTER_SUBMIT_CLASS = classOf[ClientApp].getName()
-  private[deploy] val KUBERNETES_CLUSTER_SUBMIT_CLASS =
-    "org.apache.spark.deploy.k8s.submit.KubernetesClientApplication"
-
-  // scalastyle:off println
-  private[spark] def printVersionAndExit(): Unit = {
-    printStream.println("""Welcome to
-      ____              __
-     / __/__  ___ _____/ /__
-    _\ \/ _ \/ _ `/ __/  '_/
-   /___/ .__/\_,_/_/ /_/\_\   version %s
-      /_/
-                        """.format(SPARK_VERSION))
-    printStream.println("Using Scala %s, %s, %s".format(
-      Properties.versionString, Properties.javaVmName, Properties.javaVersion))
-    printStream.println("Branch %s".format(SPARK_BRANCH))
-    printStream.println("Compiled by user %s on %s".format(SPARK_BUILD_USER, SPARK_BUILD_DATE))
-    printStream.println("Revision %s".format(SPARK_REVISION))
-    printStream.println("Url %s".format(SPARK_REPO_URL))
-    printStream.println("Type --help for more information.")
-    exitFn(0)
-  }
-  // scalastyle:on println
-
-  override def main(args: Array[String]): Unit = {
+  def doSubmit(args: Array[String]): Unit = {
     // Initialize logging if it hasn't been done yet. Keep track of whether logging needs to
     // be reset before the application starts.
     val uninitLog = initializeLogIfNecessary(true, silent = true)
 
-    val appArgs = new SparkSubmitArguments(args)
+    val appArgs = parseArguments(args)
     if (appArgs.verbose) {
-      // scalastyle:off println
-      printStream.println(appArgs)
-      // scalastyle:on println
+      logInfo(appArgs.toString)
     }
     appArgs.action match {
       case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)
       case SparkSubmitAction.KILL => kill(appArgs)
       case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)
+      case SparkSubmitAction.PRINT_VERSION => printVersion()
     }
   }
 
+  protected def parseArguments(args: Array[String]): SparkSubmitArguments = {
+    new SparkSubmitArguments(args)
+  }
+
   /**
    * Kill an existing submission using the REST protocol. Standalone and Mesos cluster mode only.
    */
@@ -156,6 +110,24 @@ object SparkSubmit extends CommandLineUtils with Logging {
       .requestSubmissionStatus(args.submissionToRequestStatusFor)
   }
 
+  /** Print version information to the log. */
+  private def printVersion(): Unit = {
+    logInfo("""Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /___/ .__/\_,_/_/ /_/\_\   version %s
+      /_/
+                        """.format(SPARK_VERSION))
+    logInfo("Using Scala %s, %s, %s".format(
+      Properties.versionString, Properties.javaVmName, Properties.javaVersion))
+    logInfo(s"Branch $SPARK_BRANCH")
+    logInfo(s"Compiled by user $SPARK_BUILD_USER on $SPARK_BUILD_DATE")
+    logInfo(s"Revision $SPARK_REVISION")
+    logInfo(s"Url $SPARK_REPO_URL")
+    logInfo("Type --help for more information.")
+  }
+
   /**
    * Submit the application using the provided parameters.
    *
@@ -185,10 +157,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
             // makes the message printed to the output by the JVM not very helpful. Instead,
             // detect exceptions with empty stack traces here, and treat them differently.
             if (e.getStackTrace().length == 0) {
-              // scalastyle:off println
-              printStream.println(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
-              // scalastyle:on println
-              exitFn(1)
+              error(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
             } else {
               throw e
             }
@@ -210,14 +179,11 @@ object SparkSubmit extends CommandLineUtils with Logging {
     // to use the legacy gateway if the master endpoint turns out to be not a REST server.
     if (args.isStandaloneCluster && args.useRest) {
       try {
-        // scalastyle:off println
-        printStream.println("Running Spark using the REST application submission protocol.")
-        // scalastyle:on println
-        doRunMain()
+        logInfo("Running Spark using the REST application submission protocol.")
       } catch {
         // Fail over to use the legacy submission gateway
         case e: SubmitRestConnectionException =>
-          printWarning(s"Master endpoint ${args.master} was not a REST server. " +
+          logWarning(s"Master endpoint ${args.master} was not a REST server. " +
             "Falling back to legacy submission gateway instead.")
           args.useRest = false
           submit(args, false)
@@ -245,19 +211,6 @@ object SparkSubmit extends CommandLineUtils with Logging {
       args: SparkSubmitArguments,
       conf: Option[HadoopConfiguration] = None)
       : (Seq[String], Seq[String], SparkConf, String) = {
-    try {
-      doPrepareSubmitEnvironment(args, conf)
-    } catch {
-      case e: SparkException =>
-        printErrorAndExit(e.getMessage)
-        throw e
-    }
-  }
-
-  private def doPrepareSubmitEnvironment(
-      args: SparkSubmitArguments,
-      conf: Option[HadoopConfiguration] = None)
-      : (Seq[String], Seq[String], SparkConf, String) = {
     // Return values
     val childArgs = new ArrayBuffer[String]()
     val childClasspath = new ArrayBuffer[String]()
@@ -268,7 +221,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
     val clusterManager: Int = args.master match {
       case "yarn" => YARN
       case "yarn-client" | "yarn-cluster" =>
-        printWarning(s"Master ${args.master} is deprecated since 2.0." +
+        logWarning(s"Master ${args.master} is deprecated since 2.0." +
           " Please use master \"yarn\" with specified deploy mode instead.")
         YARN
       case m if m.startsWith("spark") => STANDALONE
@@ -276,7 +229,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
       case m if m.startsWith("k8s") => KUBERNETES
       case m if m.startsWith("local") => LOCAL
       case _ =>
-        printErrorAndExit("Master must either be yarn or start with spark, mesos, k8s, or local")
+        error("Master must either be yarn or start with spark, mesos, k8s, or local")
         -1
     }
 
@@ -284,7 +237,9 @@ object SparkSubmit extends CommandLineUtils with Logging {
     var deployMode: Int = args.deployMode match {
       case "client" | null => CLIENT
       case "cluster" => CLUSTER
-      case _ => printErrorAndExit("Deploy mode must be either client or cluster"); -1
+      case _ =>
+        error("Deploy mode must be either client or cluster")
+        -1
     }
 
     // Because the deprecated way of specifying "yarn-cluster" and "yarn-client" encapsulate both
@@ -296,16 +251,16 @@ object SparkSubmit extends CommandLineUtils with Logging {
           deployMode = CLUSTER
           args.master = "yarn"
         case ("yarn-cluster", "client") =>
-          printErrorAndExit("Client deploy mode is not compatible with master \"yarn-cluster\"")
+          error("Client deploy mode is not compatible with master \"yarn-cluster\"")
         case ("yarn-client", "cluster") =>
-          printErrorAndExit("Cluster deploy mode is not compatible with master \"yarn-client\"")
+          error("Cluster deploy mode is not compatible with master \"yarn-client\"")
         case (_, mode) =>
           args.master = "yarn"
       }
 
       // Make sure YARN is included in our build if we're trying to use it
       if (!Utils.classIsLoadable(YARN_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
-        printErrorAndExit(
+        error(
           "Could not load YARN classes. " +
           "This copy of Spark may not have been compiled with YARN support.")
       }
@@ -315,7 +270,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
       args.master = Utils.checkAndGetK8sMasterUrl(args.master)
       // Make sure KUBERNETES is included in our build if we're trying to use it
       if (!Utils.classIsLoadable(KUBERNETES_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
-        printErrorAndExit(
+        error(
           "Could not load KUBERNETES classes. " +
             "This copy of Spark may not have been compiled with KUBERNETES support.")
       }
@@ -324,23 +279,23 @@ object SparkSubmit extends CommandLineUtils with Logging {
     // Fail fast, the following modes are not supported or applicable
     (clusterManager, deployMode) match {
       case (STANDALONE, CLUSTER) if args.isPython =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for python " +
+        error("Cluster deploy mode is currently not supported for python " +
           "applications on standalone clusters.")
       case (STANDALONE, CLUSTER) if args.isR =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for R " +
+        error("Cluster deploy mode is currently not supported for R " +
           "applications on standalone clusters.")
       case (KUBERNETES, _) if args.isPython =>
-        printErrorAndExit("Python applications are currently not supported for Kubernetes.")
+        error("Python applications are currently not supported for Kubernetes.")
       case (KUBERNETES, _) if args.isR =>
-        printErrorAndExit("R applications are currently not supported for Kubernetes.")
+        error("R applications are currently not supported for Kubernetes.")
       case (LOCAL, CLUSTER) =>
-        printErrorAndExit("Cluster deploy mode is not compatible with master \"local\"")
+        error("Cluster deploy mode is not compatible with master \"local\"")
       case (_, CLUSTER) if isShell(args.primaryResource) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
+        error("Cluster deploy mode is not applicable to Spark shells.")
       case (_, CLUSTER) if isSqlShell(args.mainClass) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark SQL shell.")
+        error("Cluster deploy mode is not applicable to Spark SQL shell.")
       case (_, CLUSTER) if isThriftServer(args.mainClass) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark Thrift server.")
+        error("Cluster deploy mode is not applicable to Spark Thrift server.")
       case _ =>
     }
 
@@ -493,11 +448,11 @@ object SparkSubmit extends CommandLineUtils with Logging {
     if (args.isR && clusterManager == YARN) {
       val sparkRPackagePath = RUtils.localSparkRPackagePath
       if (sparkRPackagePath.isEmpty) {
-        printErrorAndExit("SPARK_HOME does not exist for R application in YARN mode.")
+        error("SPARK_HOME does not exist for R application in YARN mode.")
       }
       val sparkRPackageFile = new File(sparkRPackagePath.get, SPARKR_PACKAGE_ARCHIVE)
       if (!sparkRPackageFile.exists()) {
-        printErrorAndExit(s"$SPARKR_PACKAGE_ARCHIVE does not exist for R application in YARN mode.")
+        error(s"$SPARKR_PACKAGE_ARCHIVE does not exist for R application in YARN mode.")
       }
       val sparkRPackageURI = Utils.resolveURI(sparkRPackageFile.getAbsolutePath).toString
 
@@ -510,7 +465,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
         val rPackageFile =
           RPackageUtils.zipRLibraries(new File(RUtils.rPackages.get), R_PACKAGE_ARCHIVE)
         if (!rPackageFile.exists()) {
-          printErrorAndExit("Failed to zip all the built R packages.")
+          error("Failed to zip all the built R packages.")
         }
 
         val rPackageURI = Utils.resolveURI(rPackageFile.getAbsolutePath).toString
@@ -521,12 +476,12 @@ object SparkSubmit extends CommandLineUtils with Logging {
 
     // TODO: Support distributing R packages with standalone cluster
     if (args.isR && clusterManager == STANDALONE && !RUtils.rPackages.isEmpty) {
-      printErrorAndExit("Distributing R packages with standalone cluster is not supported.")
+      error("Distributing R packages with standalone cluster is not supported.")
     }
 
     // TODO: Support distributing R packages with mesos cluster
     if (args.isR && clusterManager == MESOS && !RUtils.rPackages.isEmpty) {
-      printErrorAndExit("Distributing R packages with mesos cluster is not supported.")
+      error("Distributing R packages with mesos cluster is not supported.")
     }
 
     // If we're running an R app, set the main class to our specific R runner
@@ -799,9 +754,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
   private def setRMPrincipal(sparkConf: SparkConf): Unit = {
     val shortUserName = UserGroupInformation.getCurrentUser.getShortUserName
     val key = s"spark.hadoop.${YarnConfiguration.RM_PRINCIPAL}"
-    // scalastyle:off println
-    printStream.println(s"Setting ${key} to ${shortUserName}")
-    // scalastyle:off println
+    logInfo(s"Setting ${key} to ${shortUserName}")
     sparkConf.set(key, shortUserName)
   }
 
@@ -817,16 +770,14 @@ object SparkSubmit extends CommandLineUtils with Logging {
       sparkConf: SparkConf,
       childMainClass: String,
       verbose: Boolean): Unit = {
-    // scalastyle:off println
     if (verbose) {
-      printStream.println(s"Main class:\n$childMainClass")
-      printStream.println(s"Arguments:\n${childArgs.mkString("\n")}")
+      logInfo(s"Main class:\n$childMainClass")
+      logInfo(s"Arguments:\n${childArgs.mkString("\n")}")
       // sysProps may contain sensitive information, so redact before printing
-      printStream.println(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).mkString("\n")}")
-      printStream.println(s"Classpath elements:\n${childClasspath.mkString("\n")}")
-      printStream.println("\n")
+      logInfo(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).mkString("\n")}")
+      logInfo(s"Classpath elements:\n${childClasspath.mkString("\n")}")
+      logInfo("\n")
     }
-    // scalastyle:on println
 
     val loader =
       if (sparkConf.get(DRIVER_USER_CLASS_PATH_FIRST)) {
@@ -848,23 +799,19 @@ object SparkSubmit extends CommandLineUtils with Logging {
       mainClass = Utils.classForName(childMainClass)
     } catch {
       case e: ClassNotFoundException =>
-        e.printStackTrace(printStream)
+        logWarning(s"Failed to load $childMainClass.", e)
         if (childMainClass.contains("thriftserver")) {
-          // scalastyle:off println
-          printStream.println(s"Failed to load main class $childMainClass.")
-          printStream.println("You need to build Spark with -Phive and -Phive-thriftserver.")
-          // scalastyle:on println
+          logInfo(s"Failed to load main class $childMainClass.")
+          logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
         }
-        System.exit(CLASS_NOT_FOUND_EXIT_STATUS)
+        throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
       case e: NoClassDefFoundError =>
-        e.printStackTrace(printStream)
+        logWarning(s"Failed to load $childMainClass: ${e.getMessage()}")
         if (e.getMessage.contains("org/apache/hadoop/hive")) {
-          // scalastyle:off println
-          printStream.println(s"Failed to load hive class.")
-          printStream.println("You need to build Spark with -Phive and -Phive-thriftserver.")
-          // scalastyle:on println
+          logInfo(s"Failed to load hive class.")
+          logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
         }
-        System.exit(CLASS_NOT_FOUND_EXIT_STATUS)
+        throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
     }
 
     val app: SparkApplication = if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
@@ -872,7 +819,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
     } else {
       // SPARK-4170
       if (classOf[scala.App].isAssignableFrom(mainClass)) {
-        printWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")
+        logWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")
       }
       new JavaMainApplication(mainClass)
     }
@@ -891,29 +838,90 @@ object SparkSubmit extends CommandLineUtils with Logging {
       app.start(childArgs.toArray, sparkConf)
     } catch {
       case t: Throwable =>
-        findCause(t) match {
-          case SparkUserAppException(exitCode) =>
-            System.exit(exitCode)
-
-          case t: Throwable =>
-            throw t
-        }
+        throw findCause(t)
     }
   }
 
-  private[deploy] def addJarToClasspath(localJar: String, loader: MutableURLClassLoader) {
-    val uri = Utils.resolveURI(localJar)
-    uri.getScheme match {
-      case "file" | "local" =>
-        val file = new File(uri.getPath)
-        if (file.exists()) {
-          loader.addURL(file.toURI.toURL)
-        } else {
-          printWarning(s"Local jar $file does not exist, skipping.")
+  /** Throw a SparkException with the given error message. */
+  private def error(msg: String): Unit = throw new SparkException(msg)
+
+}
+
+
+/**
+ * This entry point is used by the launcher library to start in-process Spark applications.
+ */
+private[spark] object InProcessSparkSubmit {
+
+  def main(args: Array[String]): Unit = {
+    val submit = new SparkSubmit()
+    submit.doSubmit(args)
+  }
+
+}
+
+object SparkSubmit extends CommandLineUtils with Logging {
+
+  // Cluster managers
+  private val YARN = 1
+  private val STANDALONE = 2
+  private val MESOS = 4
+  private val LOCAL = 8
+  private val KUBERNETES = 16
+  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL | KUBERNETES
+
+  // Deploy modes
+  private val CLIENT = 1
+  private val CLUSTER = 2
+  private val ALL_DEPLOY_MODES = CLIENT | CLUSTER
+
+  // Special primary resource names that represent shells rather than application jars.
+  private val SPARK_SHELL = "spark-shell"
+  private val PYSPARK_SHELL = "pyspark-shell"
+  private val SPARKR_SHELL = "sparkr-shell"
+  private val SPARKR_PACKAGE_ARCHIVE = "sparkr.zip"
+  private val R_PACKAGE_ARCHIVE = "rpkg.zip"
+
+  private val CLASS_NOT_FOUND_EXIT_STATUS = 101
+
+  // Following constants are visible for testing.
+  private[deploy] val YARN_CLUSTER_SUBMIT_CLASS =
+    "org.apache.spark.deploy.yarn.YarnClusterApplication"
+  private[deploy] val REST_CLUSTER_SUBMIT_CLASS = classOf[RestSubmissionClientApp].getName()
+  private[deploy] val STANDALONE_CLUSTER_SUBMIT_CLASS = classOf[ClientApp].getName()
+  private[deploy] val KUBERNETES_CLUSTER_SUBMIT_CLASS =
+    "org.apache.spark.deploy.k8s.submit.KubernetesClientApplication"
+
+  override def main(args: Array[String]): Unit = {
+    val submit = new SparkSubmit() {
+      self =>
+
+      override protected def parseArguments(args: Array[String]): SparkSubmitArguments = {
+        new SparkSubmitArguments(args) {
+          override protected def logInfo(msg: => String): Unit = self.logInfo(msg)
+
+          override protected def logWarning(msg: => String): Unit = self.logWarning(msg)
         }
-      case _ =>
-        printWarning(s"Skip remote jar $uri.")
+      }
+
+      override protected def logInfo(msg: => String): Unit = printMessage(msg)
+
+      override protected def logWarning(msg: => String): Unit = printMessage(s"Warning: $msg")
+
+      override def doSubmit(args: Array[String]): Unit = {
+        try {
+          super.doSubmit(args)
+        } catch {
+          case e: SparkUserAppException =>
+            exitFn(e.exitCode)
+          case e: SparkException =>
+            printErrorAndExit(e.getMessage())
+        }
+      }
+
     }
+
+    submit.doSubmit(args)
   }
 
   /**
@@ -962,17 +970,6 @@ object SparkSubmit extends CommandLineUtils with Logging {
     res == SparkLauncher.NO_RESOURCE
   }
 
-  /**
-   * Merge a sequence of comma-separated file lists, some of which may be null to indicate
-   * no files, into a single comma-separated string.
-   */
-  private[deploy] def mergeFileLists(lists: String*): String = {
-    val merged = lists.filterNot(StringUtils.isBlank)
-                      .flatMap(_.split(","))
-                      .mkString(",")
-    if (merged == "") null else merged
-  }
-
 }
 
 /** Provides utility functions to be used inside SparkSubmit. */
@@ -1000,12 +997,12 @@ private[spark] object SparkSubmitUtils {
     override def toString: String = s"$groupId:$artifactId:$version"
   }
 
-/**
- * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided
- * in the format `groupId:artifactId:version` or `groupId/artifactId:version`.
- * @param coordinates Comma-delimited string of maven coordinates
- * @return Sequence of Maven coordinates
- */
+  /**
+   * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided
+   * in the format `groupId:artifactId:version` or `groupId/artifactId:version`.
+   * @param coordinates Comma-delimited string of maven coordinates
+   * @return Sequence of Maven coordinates
+   */
   def extractMavenCoordinates(coordinates: String): Seq[MavenCoordinate] = {
     coordinates.split(",").map { p =>
       val splits = p.replace("/", ":").split(":")
@@ -1304,6 +1301,13 @@ private[spark] object SparkSubmitUtils {
     rule
   }
 
+  def parseSparkConfProperty(pair: String): (String, String) = {
+    pair.split("=", 2).toSeq match {
+      case Seq(k, v) => (k, v)
+      case _ => throw new SparkException(s"Spark config without '=': $pair")
+    }
+  }
+
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 8e7070593687b..0733fdb72cafb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -29,7 +29,9 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.io.Source
 import scala.util.Try
 
+import org.apache.spark.{SparkException, SparkUserAppException}
 import org.apache.spark.deploy.SparkSubmitAction._
+import org.apache.spark.internal.Logging
 import org.apache.spark.launcher.SparkSubmitArgumentsParser
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.util.Utils
@@ -40,7 +42,7 @@ import org.apache.spark.util.Utils
  * The env argument is used for testing.
  */
 private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env)
-  extends SparkSubmitArgumentsParser {
+  extends SparkSubmitArgumentsParser with Logging {
   var master: String = null
   var deployMode: String = null
   var executorMemory: String = null
@@ -85,8 +87,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   /** Default properties present in the currently defined defaults file. */
   lazy val defaultSparkProperties: HashMap[String, String] = {
     val defaultProperties = new HashMap[String, String]()
-    // scalastyle:off println
-    if (verbose) SparkSubmit.printStream.println(s"Using properties file: $propertiesFile")
+    if (verbose) {
+      logInfo(s"Using properties file: $propertiesFile")
+    }
     Option(propertiesFile).foreach { filename =>
       val properties = Utils.getPropertiesFromFile(filename)
       properties.foreach { case (k, v) =>
@@ -95,21 +98,16 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       // Property files may contain sensitive information, so redact before printing
       if (verbose) {
         Utils.redact(properties).foreach { case (k, v) =>
-          SparkSubmit.printStream.println(s"Adding default property: $k=$v")
+          logInfo(s"Adding default property: $k=$v")
         }
       }
     }
-    // scalastyle:on println
     defaultProperties
   }
 
   // Set parameters from command line arguments
-  try {
-    parse(args.asJava)
-  } catch {
-    case e: IllegalArgumentException =>
-      SparkSubmit.printErrorAndExit(e.getMessage())
-  }
+  parse(args.asJava)
+
   // Populate `sparkProperties` map from properties file
   mergeDefaultSparkProperties()
   // Remove keys that don't start with "spark." from `sparkProperties`.
@@ -141,7 +139,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     sparkProperties.foreach { case (k, v) =>
       if (!k.startsWith("spark.")) {
         sparkProperties -= k
-        SparkSubmit.printWarning(s"Ignoring non-spark config property: $k=$v")
+        logWarning(s"Ignoring non-spark config property: $k=$v")
       }
     }
   }
@@ -215,10 +213,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
             }
           } catch {
             case _: Exception =>
-              SparkSubmit.printErrorAndExit(s"Cannot load main class from JAR $primaryResource")
+              error(s"Cannot load main class from JAR $primaryResource")
           }
         case _ =>
-          SparkSubmit.printErrorAndExit(
+          error(
             s"Cannot load main class from JAR $primaryResource with URI $uriScheme. " +
             "Please specify a class through --class.")
       }
@@ -248,6 +246,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       case SUBMIT => validateSubmitArguments()
       case KILL => validateKillArguments()
       case REQUEST_STATUS => validateStatusRequestArguments()
+      case PRINT_VERSION =>
     }
   }
 
@@ -256,62 +255,61 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       printUsageAndExit(-1)
     }
     if (primaryResource == null) {
-      SparkSubmit.printErrorAndExit("Must specify a primary resource (JAR or Python or R file)")
+      error("Must specify a primary resource (JAR or Python or R file)")
     }
     if (mainClass == null && SparkSubmit.isUserJar(primaryResource)) {
-      SparkSubmit.printErrorAndExit("No main class set in JAR; please specify one with --class")
+      error("No main class set in JAR; please specify one with --class")
     }
     if (driverMemory != null
         && Try(JavaUtils.byteStringAsBytes(driverMemory)).getOrElse(-1L) <= 0) {
-      SparkSubmit.printErrorAndExit("Driver Memory must be a positive number")
+      error("Driver memory must be a positive number")
     }
     if (executorMemory != null
         && Try(JavaUtils.byteStringAsBytes(executorMemory)).getOrElse(-1L) <= 0) {
-      SparkSubmit.printErrorAndExit("Executor Memory cores must be a positive number")
+      error("Executor memory must be a positive number")
     }
     if (executorCores != null && Try(executorCores.toInt).getOrElse(-1) <= 0) {
-      SparkSubmit.printErrorAndExit("Executor cores must be a positive number")
+      error("Executor cores must be a positive number")
     }
     if (totalExecutorCores != null && Try(totalExecutorCores.toInt).getOrElse(-1) <= 0) {
-      SparkSubmit.printErrorAndExit("Total executor cores must be a positive number")
+      error("Total executor cores must be a positive number")
     }
     if (numExecutors != null && Try(numExecutors.toInt).getOrElse(-1) <= 0) {
-      SparkSubmit.printErrorAndExit("Number of executors must be a positive number")
+      error("Number of executors must be a positive number")
     }
     if (pyFiles != null && !isPython) {
-      SparkSubmit.printErrorAndExit("--py-files given but primary resource is not a Python script")
+      error("--py-files given but primary resource is not a Python script")
     }
 
     if (master.startsWith("yarn")) {
       val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR")
       if (!hasHadoopEnv && !Utils.isTesting) {
-        throw new Exception(s"When running with master '$master' " +
+        error(s"When running with master '$master' " +
           "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.")
       }
     }
 
     if (proxyUser != null && principal != null) {
-      SparkSubmit.printErrorAndExit("Only one of --proxy-user or --principal can be provided.")
+      error("Only one of --proxy-user or --principal can be provided.")
     }
   }
 
   private def validateKillArguments(): Unit = {
     if (!master.startsWith("spark://") && !master.startsWith("mesos://")) {
-      SparkSubmit.printErrorAndExit(
-        "Killing submissions is only supported in standalone or Mesos mode!")
+      error("Killing submissions is only supported in standalone or Mesos mode!")
     }
     if (submissionToKill == null) {
-      SparkSubmit.printErrorAndExit("Please specify a submission to kill.")
+      error("Please specify a submission to kill.")
     }
   }
 
   private def validateStatusRequestArguments(): Unit = {
     if (!master.startsWith("spark://") && !master.startsWith("mesos://")) {
-      SparkSubmit.printErrorAndExit(
+      error(
         "Requesting submission statuses is only supported in standalone or Mesos mode!")
     }
     if (submissionToRequestStatusFor == null) {
-      SparkSubmit.printErrorAndExit("Please specify a submission to request status for.")
+      error("Please specify a submission to request status for.")
     }
   }
 
@@ -368,7 +366,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
 
       case DEPLOY_MODE =>
         if (value != "client" && value != "cluster") {
-          SparkSubmit.printErrorAndExit("--deploy-mode must be either \"client\" or \"cluster\"")
+          error("--deploy-mode must be either \"client\" or \"cluster\"")
         }
         deployMode = value
 
@@ -405,14 +403,14 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       case KILL_SUBMISSION =>
         submissionToKill = value
         if (action != null) {
-          SparkSubmit.printErrorAndExit(s"Action cannot be both $action and $KILL.")
+          error(s"Action cannot be both $action and $KILL.")
         }
         action = KILL
 
       case STATUS =>
         submissionToRequestStatusFor = value
         if (action != null) {
-          SparkSubmit.printErrorAndExit(s"Action cannot be both $action and $REQUEST_STATUS.")
+          error(s"Action cannot be both $action and $REQUEST_STATUS.")
         }
         action = REQUEST_STATUS
 
@@ -444,7 +442,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         repositories = value
 
       case CONF =>
-        val (confName, confValue) = SparkSubmit.parseSparkConfProperty(value)
+        val (confName, confValue) = SparkSubmitUtils.parseSparkConfProperty(value)
         sparkProperties(confName) = confValue
 
       case PROXY_USER =>
@@ -463,15 +461,15 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         verbose = true
 
       case VERSION =>
-        SparkSubmit.printVersionAndExit()
+        action = SparkSubmitAction.PRINT_VERSION
 
       case USAGE_ERROR =>
         printUsageAndExit(1)
 
       case _ =>
-        throw new IllegalArgumentException(s"Unexpected argument '$opt'.")
+        error(s"Unexpected argument '$opt'.")
     }
-    true
+    action != SparkSubmitAction.PRINT_VERSION
   }
 
   /**
@@ -482,7 +480,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
    */
   override protected def handleUnknown(opt: String): Boolean = {
     if (opt.startsWith("-")) {
-      SparkSubmit.printErrorAndExit(s"Unrecognized option '$opt'.")
+      error(s"Unrecognized option '$opt'.")
     }
 
     primaryResource =
@@ -501,20 +499,18 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   }
 
   private def printUsageAndExit(exitCode: Int, unknownParam: Any = null): Unit = {
-    // scalastyle:off println
-    val outStream = SparkSubmit.printStream
     if (unknownParam != null) {
-      outStream.println("Unknown/unsupported param " + unknownParam)
+      logInfo("Unknown/unsupported param " + unknownParam)
     }
     val command = sys.env.get("_SPARK_CMD_USAGE").getOrElse(
       """Usage: spark-submit [options] <app jar | python file | R file> [app arguments]
         |Usage: spark-submit --kill [submission ID] --master [spark://...]
         |Usage: spark-submit --status [submission ID] --master [spark://...]
         |Usage: spark-submit run-example [options] example-class [example args]""".stripMargin)
-    outStream.println(command)
+    logInfo(command)
 
     val mem_mb = Utils.DEFAULT_DRIVER_MEM_MB
-    outStream.println(
+    logInfo(
       s"""
         |Options:
         |  --master MASTER_URL         spark://host:port, mesos://host:port, yarn,
@@ -596,12 +592,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     )
 
     if (SparkSubmit.isSqlShell(mainClass)) {
-      outStream.println("CLI options:")
-      outStream.println(getSqlShellOptions())
+      logInfo("CLI options:")
+      logInfo(getSqlShellOptions())
     }
-    // scalastyle:on println
 
-    SparkSubmit.exitFn(exitCode)
+    throw new SparkUserAppException(exitCode)
   }
 
   /**
@@ -655,4 +650,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       System.setErr(currentErr)
     }
   }
+
+  private def error(msg: String): Unit = throw new SparkException(msg)
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index ace6d9e00c838..56db9359e033f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -18,12 +18,13 @@
 package org.apache.spark.deploy.history
 
 import java.io.{File, FileNotFoundException, IOException}
-import java.util.{Date, ServiceLoader, UUID}
+import java.util.{Date, ServiceLoader}
 import java.util.concurrent.{ExecutorService, TimeUnit}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.io.Source
 import scala.util.Try
 import scala.xml.Node
 
@@ -58,10 +59,10 @@ import org.apache.spark.util.kvstore._
  *
  * == How new and updated attempts are detected ==
  *
- * - New attempts are detected in [[checkForLogs]]: the log dir is scanned, and any
- * entries in the log dir whose modification time is greater than the last scan time
- * are considered new or updated. These are replayed to create a new attempt info entry
- * and update or create a matching application info element in the list of applications.
+ * - New attempts are detected in [[checkForLogs]]: the log dir is scanned, and any entries in the
+ * log dir whose size changed since the last scan time are considered new or updated. These are
+ * replayed to create a new attempt info entry and update or create a matching application info
+ * element in the list of applications.
  * - Updated attempts are also found in [[checkForLogs]] -- if the attempt's log file has grown, the
  * attempt is replaced by another one with a larger log size.
  *
@@ -125,6 +126,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   private val pendingReplayTasksCount = new java.util.concurrent.atomic.AtomicInteger(0)
 
   private val storePath = conf.get(LOCAL_STORE_DIR).map(new File(_))
+  private val fastInProgressParsing = conf.get(FAST_IN_PROGRESS_PARSING)
 
   // Visible for testing.
   private[history] val listing: KVStore = storePath.map { path =>
@@ -402,13 +404,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
    */
   private[history] def checkForLogs(): Unit = {
     try {
-      val newLastScanTime = getNewLastScanTime()
+      val newLastScanTime = clock.getTimeMillis()
       logDebug(s"Scanning $logDir with lastScanTime==$lastScanTime")
 
       val updated = Option(fs.listStatus(new Path(logDir))).map(_.toSeq).getOrElse(Nil)
         .filter { entry =>
           !entry.isDirectory() &&
-            // FsHistoryProvider generates a hidden file which can't be read.  Accidentally
+            // FsHistoryProvider used to generate a hidden file which can't be read.  Accidentally
             // reading a garbage file is safe, but we would log an error which can be scary to
             // the end-user.
             !entry.getPath().getName().startsWith(".") &&
@@ -417,15 +419,24 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         .filter { entry =>
           try {
             val info = listing.read(classOf[LogInfo], entry.getPath().toString())
-            if (info.fileSize < entry.getLen()) {
-              // Log size has changed, it should be parsed.
-              true
-            } else {
+
+            if (info.appId.isDefined) {
               // If the SHS view has a valid application, update the time the file was last seen so
-              // that the entry is not deleted from the SHS listing.
-              if (info.appId.isDefined) {
-                listing.write(info.copy(lastProcessed = newLastScanTime))
+              // that the entry is not deleted from the SHS listing. Also update the file size, in
+              // case the code below decides we don't need to parse the log.
+              listing.write(info.copy(lastProcessed = newLastScanTime, fileSize = entry.getLen()))
+            }
+
+            if (info.fileSize < entry.getLen()) {
+              if (info.appId.isDefined && fastInProgressParsing) {
+                // When fast in-progress parsing is on, we don't need to re-parse when the
+                // size changes, but we do need to invalidate any existing UIs.
+                invalidateUI(info.appId.get, info.attemptId)
+                false
+              } else {
+                true
               }
+            } else {
               false
             }
           } catch {
@@ -449,7 +460,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       val tasks = updated.map { entry =>
         try {
           replayExecutor.submit(new Runnable {
-            override def run(): Unit = mergeApplicationListing(entry, newLastScanTime)
+            override def run(): Unit = mergeApplicationListing(entry, newLastScanTime, true)
           })
         } catch {
           // let the iteration over the updated entries break, since an exception on
@@ -542,25 +553,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
-  private[history] def getNewLastScanTime(): Long = {
-    val fileName = "." + UUID.randomUUID().toString
-    val path = new Path(logDir, fileName)
-    val fos = fs.create(path)
-
-    try {
-      fos.close()
-      fs.getFileStatus(path).getModificationTime
-    } catch {
-      case e: Exception =>
-        logError("Exception encountered when attempting to update last scan time", e)
-        lastScanTime.get()
-    } finally {
-      if (!fs.delete(path, true)) {
-        logWarning(s"Error deleting ${path}")
-      }
-    }
-  }
-
   override def writeEventLogs(
       appId: String,
       attemptId: Option[String],
@@ -607,7 +599,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   /**
    * Replay the given log file, saving the application in the listing db.
    */
-  protected def mergeApplicationListing(fileStatus: FileStatus, scanTime: Long): Unit = {
+  protected def mergeApplicationListing(
+      fileStatus: FileStatus,
+      scanTime: Long,
+      enableOptimizations: Boolean): Unit = {
     val eventsFilter: ReplayEventsFilter = { eventString =>
       eventString.startsWith(APPL_START_EVENT_PREFIX) ||
         eventString.startsWith(APPL_END_EVENT_PREFIX) ||
@@ -616,32 +611,118 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
 
     val logPath = fileStatus.getPath()
+    val appCompleted = isCompleted(logPath.getName())
+    val reparseChunkSize = conf.get(END_EVENT_REPARSE_CHUNK_SIZE)
+
+    // Enable halt support in listener if:
+    // - app in progress && fast parsing enabled
+    // - skipping to end event is enabled (regardless of in-progress state)
+    val shouldHalt = enableOptimizations &&
+      ((!appCompleted && fastInProgressParsing) || reparseChunkSize > 0)
+
     val bus = new ReplayListenerBus()
-    val listener = new AppListingListener(fileStatus, clock)
+    val listener = new AppListingListener(fileStatus, clock, shouldHalt)
     bus.addListener(listener)
-    replay(fileStatus, bus, eventsFilter = eventsFilter)
-
-    val (appId, attemptId) = listener.applicationInfo match {
-      case Some(app) =>
-        // Invalidate the existing UI for the reloaded app attempt, if any. See LoadedAppUI for a
-        // discussion on the UI lifecycle.
-        synchronized {
-          activeUIs.get((app.info.id, app.attempts.head.info.attemptId)).foreach { ui =>
-            ui.invalidate()
-            ui.ui.store.close()
+
+    logInfo(s"Parsing $logPath for listing data...")
+    Utils.tryWithResource(EventLoggingListener.openEventLog(logPath, fs)) { in =>
+      bus.replay(in, logPath.toString, !appCompleted, eventsFilter)
+    }
+
+    // If enabled above, the listing listener will halt parsing when there's enough information to
+    // create a listing entry. When the app is completed, or fast parsing is disabled, we still need
+    // to replay until the end of the log file to try to find the app end event. Instead of reading
+    // and parsing line by line, this code skips bytes from the underlying stream so that it is
+    // positioned somewhere close to the end of the log file.
+    //
+    // Because the application end event is written while some Spark subsystems such as the
+    // scheduler are still active, there is no guarantee that the end event will be the last
+    // in the log. So, to be safe, the code uses a configurable chunk to be re-parsed at
+    // the end of the file, and retries parsing the whole log later if the needed data is
+    // still not found.
+    //
+    // Note that skipping bytes in compressed files is still not cheap, but there are still some
+    // minor gains over the normal log parsing done by the replay bus.
+    //
+    // This code re-opens the file so that it knows where it's skipping to. This isn't as cheap as
+    // just skipping from the current position, but there isn't a a good way to detect what the
+    // current position is, since the replay listener bus buffers data internally.
+    val lookForEndEvent = shouldHalt && (appCompleted || !fastInProgressParsing)
+    if (lookForEndEvent && listener.applicationInfo.isDefined) {
+      Utils.tryWithResource(EventLoggingListener.openEventLog(logPath, fs)) { in =>
+        val target = fileStatus.getLen() - reparseChunkSize
+        if (target > 0) {
+          logInfo(s"Looking for end event; skipping $target bytes from $logPath...")
+          var skipped = 0L
+          while (skipped < target) {
+            skipped += in.skip(target - skipped)
           }
         }
 
+        val source = Source.fromInputStream(in).getLines()
+
+        // Because skipping may leave the stream in the middle of a line, read the next line
+        // before replaying.
+        if (target > 0) {
+          source.next()
+        }
+
+        bus.replay(source, logPath.toString, !appCompleted, eventsFilter)
+      }
+    }
+
+    logInfo(s"Finished parsing $logPath")
+
+    listener.applicationInfo match {
+      case Some(app) if !lookForEndEvent || app.attempts.head.info.completed =>
+        // In this case, we either didn't care about the end event, or we found it. So the
+        // listing data is good.
+        invalidateUI(app.info.id, app.attempts.head.info.attemptId)
         addListing(app)
-        (Some(app.info.id), app.attempts.head.info.attemptId)
+        listing.write(LogInfo(logPath.toString(), scanTime, Some(app.info.id),
+          app.attempts.head.info.attemptId, fileStatus.getLen()))
+
+        // For a finished log, remove the corresponding "in progress" entry from the listing DB if
+        // the file is really gone.
+        if (appCompleted) {
+          val inProgressLog = logPath.toString() + EventLoggingListener.IN_PROGRESS
+          try {
+            // Fetch the entry first to avoid an RPC when it's already removed.
+            listing.read(classOf[LogInfo], inProgressLog)
+            if (!fs.isFile(new Path(inProgressLog))) {
+              listing.delete(classOf[LogInfo], inProgressLog)
+            }
+          } catch {
+            case _: NoSuchElementException =>
+          }
+        }
+
+      case Some(_) =>
+        // In this case, the attempt is still not marked as finished but was expected to. This can
+        // mean the end event is before the configured threshold, so call the method again to
+        // re-parse the whole log.
+        logInfo(s"Reparsing $logPath since end event was not found.")
+        mergeApplicationListing(fileStatus, scanTime, false)
 
       case _ =>
         // If the app hasn't written down its app ID to the logs, still record the entry in the
         // listing db, with an empty ID. This will make the log eligible for deletion if the app
         // does not make progress after the configured max log age.
-        (None, None)
+        listing.write(LogInfo(logPath.toString(), scanTime, None, None, fileStatus.getLen()))
+    }
+  }
+
+  /**
+   * Invalidate an existing UI for a given app attempt. See LoadedAppUI for a discussion on the
+   * UI lifecycle.
+   */
+  private def invalidateUI(appId: String, attemptId: Option[String]): Unit = {
+    synchronized {
+      activeUIs.get((appId, attemptId)).foreach { ui =>
+        ui.invalidate()
+        ui.ui.store.close()
+      }
     }
-    listing.write(LogInfo(logPath.toString(), scanTime, appId, attemptId, fileStatus.getLen()))
   }
 
   /**
@@ -696,29 +777,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
-  /**
-   * Replays the events in the specified log file on the supplied `ReplayListenerBus`.
-   * `ReplayEventsFilter` determines what events are replayed.
-   */
-  private def replay(
-      eventLog: FileStatus,
-      bus: ReplayListenerBus,
-      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {
-    val logPath = eventLog.getPath()
-    val isCompleted = !logPath.getName().endsWith(EventLoggingListener.IN_PROGRESS)
-    logInfo(s"Replaying log path: $logPath")
-    // Note that the eventLog may have *increased* in size since when we grabbed the filestatus,
-    // and when we read the file here.  That is OK -- it may result in an unnecessary refresh
-    // when there is no update, but will not result in missing an update.  We *must* prevent
-    // an error the other way -- if we report a size bigger (ie later) than the file that is
-    // actually read, we may never refresh the app.  FileStatus is guaranteed to be static
-    // after it's created, so we get a file size that is no bigger than what is actually read.
-    Utils.tryWithResource(EventLoggingListener.openEventLog(logPath, fs)) { in =>
-      bus.replay(in, logPath.toString, !isCompleted, eventsFilter)
-      logInfo(s"Finished parsing $logPath")
-    }
-  }
-
   /**
    * Rebuilds the application state store from its event log.
    */
@@ -741,8 +799,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     } replayBus.addListener(listener)
 
     try {
-      replay(eventLog, replayBus)
+      val path = eventLog.getPath()
+      logInfo(s"Parsing $path to re-build UI...")
+      Utils.tryWithResource(EventLoggingListener.openEventLog(path, fs)) { in =>
+        replayBus.replay(in, path.toString(), maybeTruncated = !isCompleted(path.toString()))
+      }
       trackingStore.close(false)
+      logInfo(s"Finished parsing $path")
     } catch {
       case e: Exception =>
         Utils.tryLogNonFatalError {
@@ -881,6 +944,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
+  private def isCompleted(name: String): Boolean = {
+    !name.endsWith(EventLoggingListener.IN_PROGRESS)
+  }
+
 }
 
 private[history] object FsHistoryProvider {
@@ -945,11 +1012,17 @@ private[history] class ApplicationInfoWrapper(
 
 }
 
-private[history] class AppListingListener(log: FileStatus, clock: Clock) extends SparkListener {
+private[history] class AppListingListener(
+    log: FileStatus,
+    clock: Clock,
+    haltEnabled: Boolean) extends SparkListener {
 
   private val app = new MutableApplicationInfo()
   private val attempt = new MutableAttemptInfo(log.getPath().getName(), log.getLen())
 
+  private var gotEnvUpdate = false
+  private var halted = false
+
   override def onApplicationStart(event: SparkListenerApplicationStart): Unit = {
     app.id = event.appId.orNull
     app.name = event.appName
@@ -958,6 +1031,8 @@ private[history] class AppListingListener(log: FileStatus, clock: Clock) extends
     attempt.startTime = new Date(event.time)
     attempt.lastUpdated = new Date(clock.getTimeMillis())
     attempt.sparkUser = event.sparkUser
+
+    checkProgress()
   }
 
   override def onApplicationEnd(event: SparkListenerApplicationEnd): Unit = {
@@ -968,11 +1043,18 @@ private[history] class AppListingListener(log: FileStatus, clock: Clock) extends
   }
 
   override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = {
-    val allProperties = event.environmentDetails("Spark Properties").toMap
-    attempt.viewAcls = allProperties.get("spark.ui.view.acls")
-    attempt.adminAcls = allProperties.get("spark.admin.acls")
-    attempt.viewAclsGroups = allProperties.get("spark.ui.view.acls.groups")
-    attempt.adminAclsGroups = allProperties.get("spark.admin.acls.groups")
+    // Only parse the first env update, since any future changes don't have any effect on
+    // the ACLs set for the UI.
+    if (!gotEnvUpdate) {
+      val allProperties = event.environmentDetails("Spark Properties").toMap
+      attempt.viewAcls = allProperties.get("spark.ui.view.acls")
+      attempt.adminAcls = allProperties.get("spark.admin.acls")
+      attempt.viewAclsGroups = allProperties.get("spark.ui.view.acls.groups")
+      attempt.adminAclsGroups = allProperties.get("spark.admin.acls.groups")
+
+      gotEnvUpdate = true
+      checkProgress()
+    }
   }
 
   override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
@@ -989,6 +1071,17 @@ private[history] class AppListingListener(log: FileStatus, clock: Clock) extends
     }
   }
 
+  /**
+   * Throws a halt exception to stop replay if enough data to create the app listing has been
+   * read.
+   */
+  private def checkProgress(): Unit = {
+    if (haltEnabled && !halted && app.id != null && gotEnvUpdate) {
+      halted = true
+      throw new HaltReplayException()
+    }
+  }
+
   private class MutableApplicationInfo {
     var id: String = null
     var name: String = null
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/config.scala b/core/src/main/scala/org/apache/spark/deploy/history/config.scala
index efdbf672bb52f..25ba9edb9e014 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/config.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/config.scala
@@ -49,4 +49,19 @@ private[spark] object config {
     .intConf
     .createWithDefault(18080)
 
+  val FAST_IN_PROGRESS_PARSING =
+    ConfigBuilder("spark.history.fs.inProgressOptimization.enabled")
+      .doc("Enable optimized handling of in-progress logs. This option may leave finished " +
+        "applications that fail to rename their event logs listed as in-progress.")
+      .booleanConf
+      .createWithDefault(true)
+
+  val END_EVENT_REPARSE_CHUNK_SIZE =
+    ConfigBuilder("spark.history.fs.endEventReparseChunkSize")
+      .doc("How many bytes to parse at the end of log files looking for the end event. " +
+        "This is used to speed up generation of application listings by skipping unnecessary " +
+        "parts of event log files. It can be disabled by setting this config to 0.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("1m")
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 3f71237164a15..8d6a2b80ef5f2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -25,7 +25,7 @@ import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.{DependencyUtils, SparkHadoopUtil, SparkSubmit}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.RpcEnv
-import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, Utils}
+import org.apache.spark.util._
 
 /**
  * Utility object for launching driver programs such that they share fate with the Worker process.
@@ -93,7 +93,7 @@ object DriverWrapper extends Logging {
     val jars = {
       val jarsProp = sys.props.get("spark.jars").orNull
       if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
-        SparkSubmit.mergeFileLists(jarsProp, resolvedMavenCoordinates)
+        DependencyUtils.mergeFileLists(jarsProp, resolvedMavenCoordinates)
       } else {
         jarsProp
       }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index 6d20ef1f98a3c..3e60c50ada59b 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -186,7 +186,17 @@ class HadoopMapReduceCommitProtocol(
         logDebug(s"Clean up default partition directories for overwriting: $partitionPaths")
         for (part <- partitionPaths) {
           val finalPartPath = new Path(path, part)
-          fs.delete(finalPartPath, true)
+          if (!fs.delete(finalPartPath, true) && !fs.exists(finalPartPath.getParent)) {
+            // According to the official hadoop FileSystem API spec, delete op should assume
+            // the destination is no longer present regardless of return value, thus we do not
+            // need to double check if finalPartPath exists before rename.
+            // Also in our case, based on the spec, delete returns false only when finalPartPath
+            // does not exist. When this happens, we need to take action if parent of finalPartPath
+            // also does not exist(e.g. the scenario described on SPARK-23815), because
+            // FileSystem API spec on rename op says the rename dest(finalPartPath) must have
+            // a parent that exists, otherwise we may get unexpected result on the rename.
+            fs.mkdirs(finalPartPath.getParent)
+          }
           fs.rename(new Path(stagingDir, part), finalPartPath)
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
index 7e14938acd8e0..c1fedd63f6a90 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
@@ -166,7 +166,7 @@ private class AsyncEventQueue(val name: String, conf: SparkConf, metrics: LiveLi
           val prevLastReportTimestamp = lastReportTimestamp
           lastReportTimestamp = System.currentTimeMillis()
           val previous = new java.util.Date(prevLastReportTimestamp)
-          logWarning(s"Dropped $droppedEvents events from $name since $previous.")
+          logWarning(s"Dropped $droppedCount events from $name since $previous.")
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index c9cd662f5709d..226c23733c870 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -115,6 +115,8 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
         }
       }
     } catch {
+      case e: HaltReplayException =>
+        // Just stop replay.
       case _: EOFException if maybeTruncated =>
       case ioe: IOException =>
         throw ioe
@@ -124,8 +126,17 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
     }
   }
 
+  override protected def isIgnorableException(e: Throwable): Boolean = {
+    e.isInstanceOf[HaltReplayException]
+  }
+
 }
 
+/**
+ * Exception that can be thrown by listeners to halt replay. This is handled by ReplayListenerBus
+ * only, and will cause errors if thrown when using other bus implementations.
+ */
+private[spark] class HaltReplayException extends RuntimeException
 
 private[spark] object ReplayListenerBus {
 
diff --git a/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala b/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
index d73901686b705..4b6602b50aa1c 100644
--- a/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
@@ -33,24 +33,14 @@ private[spark] trait CommandLineUtils {
   private[spark] var printStream: PrintStream = System.err
 
   // scalastyle:off println
-
-  private[spark] def printWarning(str: String): Unit = printStream.println("Warning: " + str)
+  private[spark] def printMessage(str: String): Unit = printStream.println(str)
+  // scalastyle:on println
 
   private[spark] def printErrorAndExit(str: String): Unit = {
-    printStream.println("Error: " + str)
-    printStream.println("Run with --help for usage help or --verbose for debug output")
+    printMessage("Error: " + str)
+    printMessage("Run with --help for usage help or --verbose for debug output")
     exitFn(1)
   }
 
-  // scalastyle:on println
-
-  private[spark] def parseSparkConfProperty(pair: String): (String, String) = {
-    pair.split("=", 2).toSeq match {
-      case Seq(k, v) => (k, v)
-      case _ => printErrorAndExit(s"Spark config without '=': $pair")
-        throw new SparkException(s"Spark config without '=': $pair")
-    }
-  }
-
   def main(args: Array[String]): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
index 76a56298aaebc..b25a731401f23 100644
--- a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
@@ -81,7 +81,7 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
       try {
         doPostEvent(listener, event)
       } catch {
-        case NonFatal(e) =>
+        case NonFatal(e) if !isIgnorableException(e) =>
           logError(s"Listener ${Utils.getFormattedClassName(listener)} threw an exception", e)
       } finally {
         if (maybeTimerContext != null) {
@@ -97,6 +97,9 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
    */
   protected def doPostEvent(listener: L, event: E): Unit
 
+  /** Allows bus implementations to prevent error logging for certain exceptions. */
+  protected def isIgnorableException(e: Throwable): Boolean = false
+
   private[spark] def findListenersByClass[T <: L : ClassTag](): Seq[T] = {
     val c = implicitly[ClassTag[T]].runtimeClass
     listeners.asScala.filter(_.getClass == c).map(_.asInstanceOf[T]).toSeq
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
index 8183f825592c0..81457b53cd814 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
@@ -19,6 +19,7 @@ package org.apache.spark.util.collection
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.memory.{MemoryConsumer, MemoryMode, TaskMemoryManager}
 
 /**
@@ -41,7 +42,7 @@ private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
   protected def forceSpill(): Boolean
 
   // Number of elements read from input since last spill
-  protected def elementsRead: Long = _elementsRead
+  protected def elementsRead: Int = _elementsRead
 
   // Called by subclasses every time a record is read
   // It's used for checking spilling frequency
@@ -54,15 +55,15 @@ private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
 
   // Force this collection to spill when there are this many elements in memory
   // For testing only
-  private[this] val numElementsForceSpillThreshold: Long =
-    SparkEnv.get.conf.getLong("spark.shuffle.spill.numElementsForceSpillThreshold", Long.MaxValue)
+  private[this] val numElementsForceSpillThreshold: Int =
+    SparkEnv.get.conf.get(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD)
 
   // Threshold for this collection's size in bytes before we start tracking its memory usage
   // To avoid a large number of small spills, initialize this to a value orders of magnitude > 0
   @volatile private[this] var myMemoryThreshold = initialMemoryThreshold
 
   // Number of elements read from input since last spill
-  private[this] var _elementsRead = 0L
+  private[this] var _elementsRead = 0
 
   // Number of bytes spilled in total
   @volatile private[this] var _memoryBytesSpilled = 0L
diff --git a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
index 2225591a4ff75..6a1a38c1a54f4 100644
--- a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
+++ b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
@@ -109,7 +109,7 @@ public void testChildProcLauncher() throws Exception {
       .addSparkArg(opts.CONF,
         String.format("%s=-Dfoo=ShouldBeOverriddenBelow", SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS))
       .setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
-        "-Dfoo=bar -Dtest.appender=childproc")
+        "-Dfoo=bar -Dtest.appender=console")
       .setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, System.getProperty("java.class.path"))
       .addSparkArg(opts.CLASS, "ShouldBeOverriddenBelow")
       .setMainClass(SparkLauncherTestApp.class.getName())
@@ -192,6 +192,41 @@ private void inProcessLauncherTestImpl() throws Exception {
     }
   }
 
+  @Test
+  public void testInProcessLauncherDoesNotKillJvm() throws Exception {
+    SparkSubmitOptionParser opts = new SparkSubmitOptionParser();
+    List<String[]> wrongArgs = Arrays.asList(
+      new String[] { "--unknown" },
+      new String[] { opts.DEPLOY_MODE, "invalid" });
+
+    for (String[] args : wrongArgs) {
+      InProcessLauncher launcher = new InProcessLauncher()
+        .setAppResource(SparkLauncher.NO_RESOURCE);
+      switch (args.length) {
+        case 2:
+          launcher.addSparkArg(args[0], args[1]);
+          break;
+
+        case 1:
+          launcher.addSparkArg(args[0]);
+          break;
+
+        default:
+          fail("FIXME: invalid test.");
+      }
+
+      SparkAppHandle handle = launcher.startApplication();
+      waitFor(handle);
+      assertEquals(SparkAppHandle.State.FAILED, handle.getState());
+    }
+
+    // Run --version, which is useless as a use case, but should succeed and not exit the JVM.
+    // The expected state is "LOST" since "--version" doesn't report state back to the handle.
+    SparkAppHandle handle = new InProcessLauncher().addSparkArg(opts.VERSION).startApplication();
+    waitFor(handle);
+    assertEquals(SparkAppHandle.State.LOST, handle.getState());
+  }
+
   public static class SparkLauncherTestApp {
 
     public static void main(String[] args) throws Exception {
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 0d7c342a5eacd..7451e07b25a1f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -42,6 +42,7 @@ import org.apache.spark.deploy.SparkSubmit._
 import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.util.{CommandLineUtils, ResetSystemProperties, Utils}
 
@@ -109,6 +110,8 @@ class SparkSubmitSuite
   private val emptyIvySettings = File.createTempFile("ivy", ".xml")
   FileUtils.write(emptyIvySettings, "<ivysettings />", StandardCharsets.UTF_8)
 
+  private val submit = new SparkSubmit()
+
   override def beforeEach() {
     super.beforeEach()
   }
@@ -128,13 +131,16 @@ class SparkSubmitSuite
   }
 
   test("handle binary specified but not class") {
-    testPrematureExit(Array("foo.jar"), "No main class")
+    val jar = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
+    testPrematureExit(Array(jar.toString()), "No main class")
   }
 
   test("handles arguments with --key=val") {
     val clArgs = Seq(
       "--jars=one.jar,two.jar,three.jar",
-      "--name=myApp")
+      "--name=myApp",
+      "--class=org.FooBar",
+      SparkLauncher.NO_RESOURCE)
     val appArgs = new SparkSubmitArguments(clArgs)
     appArgs.jars should include regex (".*one.jar,.*two.jar,.*three.jar")
     appArgs.name should be ("myApp")
@@ -182,7 +188,7 @@ class SparkSubmitSuite
       "thejar.jar"
     )
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
 
     appArgs.deployMode should be ("client")
     conf.get("spark.submit.deployMode") should be ("client")
@@ -192,11 +198,11 @@ class SparkSubmitSuite
       "--master", "yarn",
       "--deploy-mode", "cluster",
       "--conf", "spark.submit.deployMode=client",
-      "-class", "org.SomeClass",
+      "--class", "org.SomeClass",
       "thejar.jar"
     )
     val appArgs1 = new SparkSubmitArguments(clArgs1)
-    val (_, _, conf1, _) = prepareSubmitEnvironment(appArgs1)
+    val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1)
 
     appArgs1.deployMode should be ("cluster")
     conf1.get("spark.submit.deployMode") should be ("cluster")
@@ -210,7 +216,7 @@ class SparkSubmitSuite
     val appArgs2 = new SparkSubmitArguments(clArgs2)
     appArgs2.deployMode should be (null)
 
-    val (_, _, conf2, _) = prepareSubmitEnvironment(appArgs2)
+    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
     appArgs2.deployMode should be ("client")
     conf2.get("spark.submit.deployMode") should be ("client")
   }
@@ -233,7 +239,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
     val childArgsStr = childArgs.mkString(" ")
     childArgsStr should include ("--class org.SomeClass")
     childArgsStr should include ("--arg arg1 --arg arg2")
@@ -276,7 +282,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
     childArgs.mkString(" ") should be ("arg1 arg2")
     mainClass should be ("org.SomeClass")
     classpath should have length (4)
@@ -322,7 +328,7 @@ class SparkSubmitSuite
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
     appArgs.useRest = useRest
-    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
     val childArgsStr = childArgs.mkString(" ")
     if (useRest) {
       childArgsStr should endWith ("thejar.jar org.SomeClass arg1 arg2")
@@ -359,7 +365,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
     childArgs.mkString(" ") should be ("arg1 arg2")
     mainClass should be ("org.SomeClass")
     classpath should have length (1)
@@ -381,7 +387,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
     childArgs.mkString(" ") should be ("arg1 arg2")
     mainClass should be ("org.SomeClass")
     classpath should have length (1)
@@ -403,7 +409,7 @@ class SparkSubmitSuite
       "/home/thejar.jar",
       "arg1")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
+    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
 
     val childArgsMap = childArgs.grouped(2).map(a => a(0) -> a(1)).toMap
     childArgsMap.get("--primary-java-resource") should be (Some("file:/home/thejar.jar"))
@@ -428,7 +434,7 @@ class SparkSubmitSuite
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, mainClass) = prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
     conf.get("spark.executor.memory") should be ("5g")
     conf.get("spark.master") should be ("yarn")
     conf.get("spark.submit.deployMode") should be ("cluster")
@@ -441,12 +447,12 @@ class SparkSubmitSuite
 
     val clArgs1 = Seq("--class", "org.apache.spark.repl.Main", "spark-shell")
     val appArgs1 = new SparkSubmitArguments(clArgs1)
-    val (_, _, conf1, _) = prepareSubmitEnvironment(appArgs1)
+    val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1)
     conf1.get(UI_SHOW_CONSOLE_PROGRESS) should be (true)
 
     val clArgs2 = Seq("--class", "org.SomeClass", "thejar.jar")
     val appArgs2 = new SparkSubmitArguments(clArgs2)
-    val (_, _, conf2, _) = prepareSubmitEnvironment(appArgs2)
+    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
     assert(!conf2.contains(UI_SHOW_CONSOLE_PROGRESS))
   }
 
@@ -625,7 +631,7 @@ class SparkSubmitSuite
       "--files", files,
       "thejar.jar")
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
     appArgs.jars should be (Utils.resolveURIs(jars))
     appArgs.files should be (Utils.resolveURIs(files))
     conf.get("spark.jars") should be (Utils.resolveURIs(jars + ",thejar.jar"))
@@ -640,7 +646,7 @@ class SparkSubmitSuite
       "thejar.jar"
     )
     val appArgs2 = new SparkSubmitArguments(clArgs2)
-    val (_, _, conf2, _) = SparkSubmit.prepareSubmitEnvironment(appArgs2)
+    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
     appArgs2.files should be (Utils.resolveURIs(files))
     appArgs2.archives should fullyMatch regex ("file:/archive1,file:.*#archive3")
     conf2.get("spark.yarn.dist.files") should be (Utils.resolveURIs(files))
@@ -656,7 +662,7 @@ class SparkSubmitSuite
       "mister.py"
     )
     val appArgs3 = new SparkSubmitArguments(clArgs3)
-    val (_, _, conf3, _) = SparkSubmit.prepareSubmitEnvironment(appArgs3)
+    val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
     appArgs3.pyFiles should be (Utils.resolveURIs(pyFiles))
     conf3.get("spark.submit.pyFiles") should be (
       PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
@@ -708,7 +714,7 @@ class SparkSubmitSuite
       "thejar.jar"
     )
     val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
     conf.get("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
     conf.get("spark.files") should be(Utils.resolveURIs(files))
 
@@ -725,7 +731,7 @@ class SparkSubmitSuite
       "thejar.jar"
     )
     val appArgs2 = new SparkSubmitArguments(clArgs2)
-    val (_, _, conf2, _) = SparkSubmit.prepareSubmitEnvironment(appArgs2)
+    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
     conf2.get("spark.yarn.dist.files") should be(Utils.resolveURIs(files))
     conf2.get("spark.yarn.dist.archives") should be(Utils.resolveURIs(archives))
 
@@ -740,7 +746,7 @@ class SparkSubmitSuite
       "mister.py"
     )
     val appArgs3 = new SparkSubmitArguments(clArgs3)
-    val (_, _, conf3, _) = SparkSubmit.prepareSubmitEnvironment(appArgs3)
+    val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
     conf3.get("spark.submit.pyFiles") should be(
       PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
 
@@ -757,7 +763,7 @@ class SparkSubmitSuite
       "hdfs:///tmp/mister.py"
     )
     val appArgs4 = new SparkSubmitArguments(clArgs4)
-    val (_, _, conf4, _) = SparkSubmit.prepareSubmitEnvironment(appArgs4)
+    val (_, _, conf4, _) = submit.prepareSubmitEnvironment(appArgs4)
     // Should not format python path for yarn cluster mode
     conf4.get("spark.submit.pyFiles") should be(Utils.resolveURIs(remotePyFiles))
   }
@@ -778,17 +784,17 @@ class SparkSubmitSuite
   }
 
   test("SPARK_CONF_DIR overrides spark-defaults.conf") {
-    forConfDir(Map("spark.executor.memory" -> "2.3g")) { path =>
+    forConfDir(Map("spark.executor.memory" -> "3g")) { path =>
       val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
       val args = Seq(
         "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
         "--name", "testApp",
         "--master", "local",
         unusedJar.toString)
-      val appArgs = new SparkSubmitArguments(args, Map("SPARK_CONF_DIR" -> path))
+      val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
       assert(appArgs.propertiesFile != null)
       assert(appArgs.propertiesFile.startsWith(path))
-      appArgs.executorMemory should be ("2.3g")
+      appArgs.executorMemory should be ("3g")
     }
   }
 
@@ -809,6 +815,9 @@ class SparkSubmitSuite
     val archive1 = File.createTempFile("archive1", ".zip", tmpArchiveDir)
     val archive2 = File.createTempFile("archive2", ".zip", tmpArchiveDir)
 
+    val tempPyFile = File.createTempFile("tmpApp", ".py")
+    tempPyFile.deleteOnExit()
+
     val args = Seq(
       "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
       "--name", "testApp",
@@ -818,10 +827,10 @@ class SparkSubmitSuite
       "--files", s"${tmpFileDir.getAbsolutePath}/tmpFile*",
       "--py-files", s"${tmpPyFileDir.getAbsolutePath}/tmpPy*",
       "--archives", s"${tmpArchiveDir.getAbsolutePath}/*.zip",
-      jar2.toString)
+      tempPyFile.toURI().toString())
 
     val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs)
+    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
     conf.get("spark.yarn.dist.jars").split(",").toSet should be
       (Set(jar1.toURI.toString, jar2.toURI.toString))
     conf.get("spark.yarn.dist.files").split(",").toSet should be
@@ -947,7 +956,7 @@ class SparkSubmitSuite
       )
 
     val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs, Some(hadoopConf))
+    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
 
     // All the resources should still be remote paths, so that YARN client will not upload again.
     conf.get("spark.yarn.dist.jars") should be (tmpJarPath)
@@ -1007,7 +1016,7 @@ class SparkSubmitSuite
     ) ++ forceDownloadArgs ++ Seq(s"s3a://$mainResource")
 
     val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = SparkSubmit.prepareSubmitEnvironment(appArgs, Some(hadoopConf))
+    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
 
     val jars = conf.get("spark.yarn.dist.jars").split(",").toSet
 
@@ -1058,7 +1067,7 @@ class SparkSubmitSuite
       "hello")
 
     val exception = intercept[SparkException] {
-      SparkSubmit.main(args)
+      submit.doSubmit(args)
     }
 
     assert(exception.getMessage() === "hello")
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 0ba57bf4563c1..77b239489d489 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.json4s.jackson.JsonMethods._
 import org.mockito.Matchers.any
-import org.mockito.Mockito.{doReturn, mock, spy, verify}
+import org.mockito.Mockito.{mock, spy, verify}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
@@ -151,8 +151,9 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
       var mergeApplicationListingCall = 0
       override protected def mergeApplicationListing(
           fileStatus: FileStatus,
-          lastSeen: Long): Unit = {
-        super.mergeApplicationListing(fileStatus, lastSeen)
+          lastSeen: Long,
+          enableSkipToEnd: Boolean): Unit = {
+        super.mergeApplicationListing(fileStatus, lastSeen, enableSkipToEnd)
         mergeApplicationListingCall += 1
       }
     }
@@ -256,14 +257,13 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
       )
 
     updateAndCheck(provider) { list =>
-      list should not be (null)
       list.size should be (1)
       list.head.attempts.size should be (3)
       list.head.attempts.head.attemptId should be (Some("attempt3"))
     }
 
     val app2Attempt1 = newLogFile("app2", Some("attempt1"), inProgress = false)
-    writeFile(attempt1, true, None,
+    writeFile(app2Attempt1, true, None,
       SparkListenerApplicationStart("app2", Some("app2"), 5L, "test", Some("attempt1")),
       SparkListenerApplicationEnd(6L)
       )
@@ -649,8 +649,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     // Add more info to the app log, and trigger the provider to update things.
     writeFile(appLog, true, None,
       SparkListenerApplicationStart(appId, Some(appId), 1L, "test", None),
-      SparkListenerJobStart(0, 1L, Nil, null),
-      SparkListenerApplicationEnd(5L)
+      SparkListenerJobStart(0, 1L, Nil, null)
       )
     provider.checkForLogs()
 
@@ -668,11 +667,12 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
   test("clean up stale app information") {
     val storeDir = Utils.createTempDir()
     val conf = createTestConf().set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
-    val provider = spy(new FsHistoryProvider(conf))
+    val clock = new ManualClock()
+    val provider = spy(new FsHistoryProvider(conf, clock))
     val appId = "new1"
 
     // Write logs for two app attempts.
-    doReturn(1L).when(provider).getNewLastScanTime()
+    clock.advance(1)
     val attempt1 = newLogFile(appId, Some("1"), inProgress = false)
     writeFile(attempt1, true, None,
       SparkListenerApplicationStart(appId, Some(appId), 1L, "test", Some("1")),
@@ -697,7 +697,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
     // Delete the underlying log file for attempt 1 and rescan. The UI should go away, but since
     // attempt 2 still exists, listing data should be there.
-    doReturn(2L).when(provider).getNewLastScanTime()
+    clock.advance(1)
     attempt1.delete()
     updateAndCheck(provider) { list =>
       assert(list.size === 1)
@@ -708,7 +708,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     assert(provider.getAppUI(appId, None) === None)
 
     // Delete the second attempt's log file. Now everything should go away.
-    doReturn(3L).when(provider).getNewLastScanTime()
+    clock.advance(1)
     attempt2.delete()
     updateAndCheck(provider) { list =>
       assert(list.isEmpty)
@@ -718,9 +718,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
   test("SPARK-21571: clean up removes invalid history files") {
     val clock = new ManualClock()
     val conf = createTestConf().set(MAX_LOG_AGE_S.key, s"2d")
-    val provider = new FsHistoryProvider(conf, clock) {
-      override def getNewLastScanTime(): Long = clock.getTimeMillis()
-    }
+    val provider = new FsHistoryProvider(conf, clock)
 
     // Create 0-byte size inprogress and complete files
     var logCount = 0
@@ -772,6 +770,54 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     assert(new File(testDir.toURI).listFiles().size === validLogCount)
   }
 
+  test("always find end event for finished apps") {
+    // Create a log file where the end event is before the configure chunk to be reparsed at
+    // the end of the file. The correct listing should still be generated.
+    val log = newLogFile("end-event-test", None, inProgress = false)
+    writeFile(log, true, None,
+      Seq(
+        SparkListenerApplicationStart("end-event-test", Some("end-event-test"), 1L, "test", None),
+        SparkListenerEnvironmentUpdate(Map(
+          "Spark Properties" -> Seq.empty,
+          "JVM Information" -> Seq.empty,
+          "System Properties" -> Seq.empty,
+          "Classpath Entries" -> Seq.empty
+        )),
+        SparkListenerApplicationEnd(5L)
+      ) ++ (1 to 1000).map { i => SparkListenerJobStart(i, i, Nil) }: _*)
+
+    val conf = createTestConf().set(END_EVENT_REPARSE_CHUNK_SIZE.key, s"1k")
+    val provider = new FsHistoryProvider(conf)
+    updateAndCheck(provider) { list =>
+      assert(list.size === 1)
+      assert(list(0).attempts.size === 1)
+      assert(list(0).attempts(0).completed)
+    }
+  }
+
+  test("parse event logs with optimizations off") {
+    val conf = createTestConf()
+      .set(END_EVENT_REPARSE_CHUNK_SIZE, 0L)
+      .set(FAST_IN_PROGRESS_PARSING, false)
+    val provider = new FsHistoryProvider(conf)
+
+    val complete = newLogFile("complete", None, inProgress = false)
+    writeFile(complete, true, None,
+      SparkListenerApplicationStart("complete", Some("complete"), 1L, "test", None),
+      SparkListenerApplicationEnd(5L)
+      )
+
+    val incomplete = newLogFile("incomplete", None, inProgress = true)
+    writeFile(incomplete, true, None,
+      SparkListenerApplicationStart("incomplete", Some("incomplete"), 1L, "test", None)
+      )
+
+    updateAndCheck(provider) { list =>
+      list.size should be (2)
+      list.count(_.attempts.head.completed) should be (1)
+    }
+  }
+
   /**
    * Asks the provider to check for logs and calls a function to perform checks on the updated
    * app list. Example:
@@ -815,7 +861,8 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
   private def createTestConf(inMemory: Boolean = false): SparkConf = {
     val conf = new SparkConf()
-      .set("spark.history.fs.logDirectory", testDir.getAbsolutePath())
+      .set(EVENT_LOG_DIR, testDir.getAbsolutePath())
+      .set(FAST_IN_PROGRESS_PARSING, true)
 
     if (!inMemory) {
       conf.set(LOCAL_STORE_DIR, Utils.createTempDir().getAbsolutePath())
@@ -848,4 +895,3 @@ class TestGroupsMappingProvider extends GroupMappingServiceProvider {
     mappings.get(username).map(Set(_)).getOrElse(Set.empty)
   }
 }
-
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
index e505bc018857d..54c168a8218f3 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@@ -445,7 +445,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach {
       "--class", mainClass,
       mainJar) ++ appArgs
     val args = new SparkSubmitArguments(commandLineArgs)
-    val (_, _, sparkConf, _) = SparkSubmit.prepareSubmitEnvironment(args)
+    val (_, _, sparkConf, _) = new SparkSubmit().prepareSubmitEnvironment(args)
     new RestSubmissionClient("spark://host:port").constructSubmitRequest(
       mainJar, mainClass, appArgs, sparkConf.getAll.toMap, Map.empty)
   }
diff --git a/docs/README.md b/docs/README.md
index 9eac4ba35c458..dbea4d64c4298 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -22,10 +22,13 @@ $ sudo gem install jekyll jekyll-redirect-from pygments.rb
 $ sudo pip install Pygments
 # Following is needed only for generating API docs
 $ sudo pip install sphinx pypandoc mkdocs
-$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "roxygen2", "testthat", "rmarkdown"), repos="http://cran.stat.ucla.edu/")'
+$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="http://cran.stat.ucla.edu/")'
+$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "5.0.1", repos="http://cran.stat.ucla.edu/")'
 ```
 
-(Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0)
+Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0.
+
+Note: Other versions of roxygen2 might work in SparkR documentation generation but `RoxygenNote` field in `$SPARK_HOME/R/pkg/DESCRIPTION` is 5.0.1, which is updated if the version is mismatched.
 
 ## Generating the Documentation HTML
 
@@ -62,12 +65,12 @@ $ PRODUCTION=1 jekyll build
 
 ## API Docs (Scaladoc, Javadoc, Sphinx, roxygen2, MkDocs)
 
-You can build just the Spark scaladoc and javadoc by running `build/sbt unidoc` from the `SPARK_HOME` directory.
+You can build just the Spark scaladoc and javadoc by running `build/sbt unidoc` from the `$SPARK_HOME` directory.
 
 Similarly, you can build just the PySpark docs by running `make html` from the
-`SPARK_HOME/python/docs` directory. Documentation is only generated for classes that are listed as
-public in `__init__.py`. The SparkR docs can be built by running `SPARK_HOME/R/create-docs.sh`, and
-the SQL docs can be built by running `SPARK_HOME/sql/create-docs.sh`
+`$SPARK_HOME/python/docs` directory. Documentation is only generated for classes that are listed as
+public in `__init__.py`. The SparkR docs can be built by running `$SPARK_HOME/R/create-docs.sh`, and
+the SQL docs can be built by running `$SPARK_HOME/sql/create-docs.sh`
 after [building Spark](https://github.com/apache/spark#building-spark) first.
 
 When you run `jekyll build` in the `docs` directory, it will also copy over the scaladoc and javadoc for the various
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java
index 44e69fc45dffa..4e02843480e8f 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java
@@ -139,7 +139,7 @@ public T setMainClass(String mainClass) {
   public T addSparkArg(String arg) {
     SparkSubmitOptionParser validator = new ArgumentValidator(false);
     validator.parse(Arrays.asList(arg));
-    builder.sparkArgs.add(arg);
+    builder.userArgs.add(arg);
     return self();
   }
 
@@ -187,8 +187,8 @@ public T addSparkArg(String name, String value) {
       }
     } else {
       validator.parse(Arrays.asList(name, value));
-      builder.sparkArgs.add(name);
-      builder.sparkArgs.add(value);
+      builder.userArgs.add(name);
+      builder.userArgs.add(value);
     }
     return self();
   }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java
index 6d726b4a69a86..688e1f763c205 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java
@@ -89,10 +89,18 @@ Method findSparkSubmit() throws IOException {
     }
 
     Class<?> sparkSubmit;
+    // SPARK-22941: first try the new SparkSubmit interface that has better error handling,
+    // but fall back to the old interface in case someone is mixing & matching launcher and
+    // Spark versions.
     try {
-      sparkSubmit = cl.loadClass("org.apache.spark.deploy.SparkSubmit");
-    } catch (Exception e) {
-      throw new IOException("Cannot find SparkSubmit; make sure necessary jars are available.", e);
+      sparkSubmit = cl.loadClass("org.apache.spark.deploy.InProcessSparkSubmit");
+    } catch (Exception e1) {
+      try {
+        sparkSubmit = cl.loadClass("org.apache.spark.deploy.SparkSubmit");
+      } catch (Exception e2) {
+        throw new IOException("Cannot find SparkSubmit; make sure necessary jars are available.",
+          e2);
+      }
     }
 
     Method main;
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index e0ef22d7d5058..5cb6457bf5c21 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -88,8 +88,9 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
       SparkLauncher.NO_RESOURCE);
   }
 
-  final List<String> sparkArgs;
-  private final boolean isAppResourceReq;
+  final List<String> userArgs;
+  private final List<String> parsedArgs;
+  private final boolean requiresAppResource;
   private final boolean isExample;
 
   /**
@@ -99,17 +100,27 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
    */
   private boolean allowsMixedArguments;
 
+  /**
+   * This constructor is used when creating a user-configurable launcher. It allows the
+   * spark-submit argument list to be modified after creation.
+   */
   SparkSubmitCommandBuilder() {
-    this.sparkArgs = new ArrayList<>();
-    this.isAppResourceReq = true;
+    this.requiresAppResource = true;
     this.isExample = false;
+    this.parsedArgs = new ArrayList<>();
+    this.userArgs = new ArrayList<>();
   }
 
+  /**
+   * This constructor is used when invoking spark-submit; it parses and validates arguments
+   * provided by the user on the command line.
+   */
   SparkSubmitCommandBuilder(List<String> args) {
     this.allowsMixedArguments = false;
-    this.sparkArgs = new ArrayList<>();
+    this.parsedArgs = new ArrayList<>();
     boolean isExample = false;
     List<String> submitArgs = args;
+    this.userArgs = Collections.emptyList();
 
     if (args.size() > 0) {
       switch (args.get(0)) {
@@ -131,21 +142,21 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
       }
 
       this.isExample = isExample;
-      OptionParser parser = new OptionParser();
+      OptionParser parser = new OptionParser(true);
       parser.parse(submitArgs);
-      this.isAppResourceReq = parser.isAppResourceReq;
-    }  else {
+      this.requiresAppResource = parser.requiresAppResource;
+    } else {
       this.isExample = isExample;
-      this.isAppResourceReq = false;
+      this.requiresAppResource = false;
     }
   }
 
   @Override
   public List<String> buildCommand(Map<String, String> env)
       throws IOException, IllegalArgumentException {
-    if (PYSPARK_SHELL.equals(appResource) && isAppResourceReq) {
+    if (PYSPARK_SHELL.equals(appResource) && requiresAppResource) {
       return buildPySparkShellCommand(env);
-    } else if (SPARKR_SHELL.equals(appResource) && isAppResourceReq) {
+    } else if (SPARKR_SHELL.equals(appResource) && requiresAppResource) {
       return buildSparkRCommand(env);
     } else {
       return buildSparkSubmitCommand(env);
@@ -154,9 +165,19 @@ public List<String> buildCommand(Map<String, String> env)
 
   List<String> buildSparkSubmitArgs() {
     List<String> args = new ArrayList<>();
-    SparkSubmitOptionParser parser = new SparkSubmitOptionParser();
+    OptionParser parser = new OptionParser(false);
+    final boolean requiresAppResource;
+
+    // If the user args array is not empty, we need to parse it to detect exactly what
+    // the user is trying to run, so that checks below are correct.
+    if (!userArgs.isEmpty()) {
+      parser.parse(userArgs);
+      requiresAppResource = parser.requiresAppResource;
+    } else {
+      requiresAppResource = this.requiresAppResource;
+    }
 
-    if (!allowsMixedArguments && isAppResourceReq) {
+    if (!allowsMixedArguments && requiresAppResource) {
       checkArgument(appResource != null, "Missing application resource.");
     }
 
@@ -208,15 +229,16 @@ List<String> buildSparkSubmitArgs() {
       args.add(join(",", pyFiles));
     }
 
-    if (isAppResourceReq) {
-      checkArgument(!isExample || mainClass != null, "Missing example class name.");
+    if (isExample) {
+      checkArgument(mainClass != null, "Missing example class name.");
     }
+
     if (mainClass != null) {
       args.add(parser.CLASS);
       args.add(mainClass);
     }
 
-    args.addAll(sparkArgs);
+    args.addAll(parsedArgs);
     if (appResource != null) {
       args.add(appResource);
     }
@@ -399,7 +421,12 @@ private List<String> findExamplesJars() {
 
   private class OptionParser extends SparkSubmitOptionParser {
 
-    boolean isAppResourceReq = true;
+    boolean requiresAppResource = true;
+    private final boolean errorOnUnknownArgs;
+
+    OptionParser(boolean errorOnUnknownArgs) {
+      this.errorOnUnknownArgs = errorOnUnknownArgs;
+    }
 
     @Override
     protected boolean handle(String opt, String value) {
@@ -443,23 +470,23 @@ protected boolean handle(String opt, String value) {
           break;
         case KILL_SUBMISSION:
         case STATUS:
-          isAppResourceReq = false;
-          sparkArgs.add(opt);
-          sparkArgs.add(value);
+          requiresAppResource = false;
+          parsedArgs.add(opt);
+          parsedArgs.add(value);
           break;
         case HELP:
         case USAGE_ERROR:
-          isAppResourceReq = false;
-          sparkArgs.add(opt);
+          requiresAppResource = false;
+          parsedArgs.add(opt);
           break;
         case VERSION:
-          isAppResourceReq = false;
-          sparkArgs.add(opt);
+          requiresAppResource = false;
+          parsedArgs.add(opt);
           break;
         default:
-          sparkArgs.add(opt);
+          parsedArgs.add(opt);
           if (value != null) {
-            sparkArgs.add(value);
+            parsedArgs.add(value);
           }
           break;
       }
@@ -483,12 +510,13 @@ protected boolean handleUnknown(String opt) {
         mainClass = className;
         appResource = SparkLauncher.NO_RESOURCE;
         return false;
-      } else {
+      } else if (errorOnUnknownArgs) {
         checkArgument(!opt.startsWith("-"), "Unrecognized option: %s", opt);
         checkState(appResource == null, "Found unrecognized argument but resource is already set.");
         appResource = opt;
         return false;
       }
+      return true;
     }
 
     @Override
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index f04fde2cbbca1..5348d882cfd67 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -32,7 +32,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
-import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
 import org.apache.spark.ml.param.shared.{HasParallelism, HasWeightCol}
 import org.apache.spark.ml.util._
@@ -55,7 +55,7 @@ private[ml] trait ClassifierTypeTrait {
 /**
  * Params for [[OneVsRest]].
  */
-private[ml] trait OneVsRestParams extends PredictorParams
+private[ml] trait OneVsRestParams extends ClassifierParams
   with ClassifierTypeTrait with HasWeightCol {
 
   /**
@@ -138,6 +138,14 @@ final class OneVsRestModel private[ml] (
     @Since("1.4.0") val models: Array[_ <: ClassificationModel[_, _]])
   extends Model[OneVsRestModel] with OneVsRestParams with MLWritable {
 
+  require(models.nonEmpty, "OneVsRestModel requires at least one model for one class")
+
+  @Since("2.4.0")
+  val numClasses: Int = models.length
+
+  @Since("2.4.0")
+  val numFeatures: Int = models.head.numFeatures
+
   /** @group setParam */
   @Since("2.1.0")
   def setFeaturesCol(value: String): this.type = set(featuresCol, value)
@@ -146,6 +154,10 @@ final class OneVsRestModel private[ml] (
   @Since("2.1.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
 
+  /** @group setParam */
+  @Since("2.4.0")
+  def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)
+
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
     validateAndTransformSchema(schema, fitting = false, getClassifier.featuresDataType)
@@ -181,6 +193,7 @@ final class OneVsRestModel private[ml] (
         val updateUDF = udf { (predictions: Map[Int, Double], prediction: Vector) =>
           predictions + ((index, prediction(1)))
         }
+
         model.setFeaturesCol($(featuresCol))
         val transformedDataset = model.transform(df).select(columns: _*)
         val updatedDataset = transformedDataset
@@ -195,15 +208,34 @@ final class OneVsRestModel private[ml] (
       newDataset.unpersist()
     }
 
-    // output the index of the classifier with highest confidence as prediction
-    val labelUDF = udf { (predictions: Map[Int, Double]) =>
-      predictions.maxBy(_._2)._1.toDouble
-    }
+    if (getRawPredictionCol != "") {
+      val numClass = models.length
 
-    // output label and label metadata as prediction
-    aggregatedDataset
-      .withColumn($(predictionCol), labelUDF(col(accColName)), labelMetadata)
-      .drop(accColName)
+      // output the RawPrediction as vector
+      val rawPredictionUDF = udf { (predictions: Map[Int, Double]) =>
+        val predArray = Array.fill[Double](numClass)(0.0)
+        predictions.foreach { case (idx, value) => predArray(idx) = value }
+        Vectors.dense(predArray)
+      }
+
+      // output the index of the classifier with highest confidence as prediction
+      val labelUDF = udf { (rawPredictions: Vector) => rawPredictions.argmax.toDouble }
+
+      // output confidence as raw prediction, label and label metadata as prediction
+      aggregatedDataset
+        .withColumn(getRawPredictionCol, rawPredictionUDF(col(accColName)))
+        .withColumn(getPredictionCol, labelUDF(col(getRawPredictionCol)), labelMetadata)
+        .drop(accColName)
+    } else {
+      // output the index of the classifier with highest confidence as prediction
+      val labelUDF = udf { (predictions: Map[Int, Double]) =>
+        predictions.maxBy(_._2)._1.toDouble
+      }
+      // output label and label metadata as prediction
+      aggregatedDataset
+        .withColumn(getPredictionCol, labelUDF(col(accColName)), labelMetadata)
+        .drop(accColName)
+    }
   }
 
   @Since("1.4.1")
@@ -297,6 +329,10 @@ final class OneVsRest @Since("1.4.0") (
   @Since("1.5.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
 
+  /** @group setParam */
+  @Since("2.4.0")
+  def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)
+
   /**
    * The implementation of parallel one vs. rest runs the classification for
    * each class in a separate threads.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
index 36a46ca6ff4b7..41eaaf9679914 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
@@ -73,6 +73,14 @@ class BucketedRandomProjectionLSHModel private[ml](
     private[ml] val randUnitVectors: Array[Vector])
   extends LSHModel[BucketedRandomProjectionLSHModel] with BucketedRandomProjectionLSHParams {
 
+  /** @group setParam */
+  @Since("2.4.0")
+  override def setInputCol(value: String): this.type = super.set(inputCol, value)
+
+  /** @group setParam */
+  @Since("2.4.0")
+  override def setOutputCol(value: String): this.type = super.set(outputCol, value)
+
   @Since("2.1.0")
   override protected[ml] val hashFunction: Vector => Array[Vector] = {
     key: Vector => {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
index 1c9f47a0b201d..a70931f783f45 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
@@ -65,6 +65,12 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
   extends Model[T] with LSHParams with MLWritable {
   self: T =>
 
+  /** @group setParam */
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
   /**
    * The hash function of LSH, mapping an input feature vector to multiple hash vectors.
    * @return The mapping of LSH function.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
index 145422a059196..556848e45532d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
@@ -51,6 +51,14 @@ class MinHashLSHModel private[ml](
     private[ml] val randCoefficients: Array[(Int, Int)])
   extends LSHModel[MinHashLSHModel] {
 
+  /** @group setParam */
+  @Since("2.4.0")
+  override def setInputCol(value: String): this.type = super.set(inputCol, value)
+
+  /** @group setParam */
+  @Since("2.4.0")
+  override def setOutputCol(value: String): this.type = super.set(outputCol, value)
+
   @Since("2.1.0")
   override protected[ml] val hashFunction: Vector => Array[Vector] = {
     elems: Vector => {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 6bf4aa38b1fcb..4061154b39c14 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -71,12 +71,12 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
    */
   @Since("2.4.0")
   override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
-    """Param for how to handle invalid data (NULL values). Options are 'skip' (filter out rows with
-      |invalid data), 'error' (throw an error), or 'keep' (return relevant number of NaN in the
-      |output). Column lengths are taken from the size of ML Attribute Group, which can be set using
-      |`VectorSizeHint` in a pipeline before `VectorAssembler`. Column lengths can also be inferred
-      |from first rows of the data since it is safe to do so but only in case of 'error' or 'skip'.
-      |""".stripMargin.replaceAll("\n", " "),
+    """Param for how to handle invalid data (NULL and NaN values). Options are 'skip' (filter out
+      |rows with invalid data), 'error' (throw an error), or 'keep' (return relevant number of NaN
+      |in the output). Column lengths are taken from the size of ML Attribute Group, which can be
+      |set using `VectorSizeHint` in a pipeline before `VectorAssembler`. Column lengths can also
+      |be inferred from first rows of the data since it is safe to do so but only in case of 'error'
+      |or 'skip'.""".stripMargin.replaceAll("\n", " "),
     ParamValidators.inArray(VectorAssembler.supportedHandleInvalids))
 
   setDefault(handleInvalid, VectorAssembler.ERROR_INVALID)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
index c62d7463288f7..adf8145726711 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
@@ -24,7 +24,7 @@ import org.apache.spark.api.java.function.Function
 import org.apache.spark.ml.util.SchemaUtils
 import org.apache.spark.mllib.stat.{Statistics => OldStatistics}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions.col
 
 /**
@@ -59,7 +59,7 @@ object KolmogorovSmirnovTest {
    * distribution of the sample data and the theoretical distribution we can provide a test for the
    * the null hypothesis that the sample data comes from that theoretical distribution.
    *
-   * @param dataset a `DataFrame` containing the sample of data to test
+   * @param dataset A `Dataset` or a `DataFrame` containing the sample of data to test
    * @param sampleCol Name of sample column in dataset, of any numerical type
    * @param cdf a `Double => Double` function to calculate the theoretical CDF at a given value
    * @return DataFrame containing the test result for the input sampled data.
@@ -68,10 +68,10 @@ object KolmogorovSmirnovTest {
    *          - `statistic: Double`
    */
   @Since("2.4.0")
-  def test(dataset: DataFrame, sampleCol: String, cdf: Double => Double): DataFrame = {
+  def test(dataset: Dataset[_], sampleCol: String, cdf: Double => Double): DataFrame = {
     val spark = dataset.sparkSession
 
-    val rdd = getSampleRDD(dataset, sampleCol)
+    val rdd = getSampleRDD(dataset.toDF(), sampleCol)
     val testResult = OldStatistics.kolmogorovSmirnovTest(rdd, cdf)
     spark.createDataFrame(Seq(KolmogorovSmirnovTestResult(
       testResult.pValue, testResult.statistic)))
@@ -81,10 +81,11 @@ object KolmogorovSmirnovTest {
    * Java-friendly version of `test(dataset: DataFrame, sampleCol: String, cdf: Double => Double)`
    */
   @Since("2.4.0")
-  def test(dataset: DataFrame, sampleCol: String,
-    cdf: Function[java.lang.Double, java.lang.Double]): DataFrame = {
-    val f: Double => Double = x => cdf.call(x)
-    test(dataset, sampleCol, f)
+  def test(
+      dataset: Dataset[_],
+      sampleCol: String,
+      cdf: Function[java.lang.Double, java.lang.Double]): DataFrame = {
+    test(dataset, sampleCol, (x: Double) => cdf.call(x).toDouble)
   }
 
   /**
@@ -92,10 +93,11 @@ object KolmogorovSmirnovTest {
    * distribution equality. Currently supports the normal distribution, taking as parameters
    * the mean and standard deviation.
    *
-   * @param dataset a `DataFrame` containing the sample of data to test
+   * @param dataset A `Dataset` or a `DataFrame` containing the sample of data to test
    * @param sampleCol Name of sample column in dataset, of any numerical type
    * @param distName a `String` name for a theoretical distribution, currently only support "norm".
-   * @param params `Double*` specifying the parameters to be used for the theoretical distribution
+   * @param params `Double*` specifying the parameters to be used for the theoretical distribution.
+    *              For "norm" distribution, the parameters includes mean and variance.
    * @return DataFrame containing the test result for the input sampled data.
    *         This DataFrame will contain a single Row with the following fields:
    *          - `pValue: Double`
@@ -103,10 +105,13 @@ object KolmogorovSmirnovTest {
    */
   @Since("2.4.0")
   @varargs
-  def test(dataset: DataFrame, sampleCol: String, distName: String, params: Double*): DataFrame = {
+  def test(
+      dataset: Dataset[_],
+      sampleCol: String, distName: String,
+      params: Double*): DataFrame = {
     val spark = dataset.sparkSession
 
-    val rdd = getSampleRDD(dataset, sampleCol)
+    val rdd = getSampleRDD(dataset.toDF(), sampleCol)
     val testResult = OldStatistics.kolmogorovSmirnovTest(rdd, distName, params: _*)
     spark.createDataFrame(Seq(KolmogorovSmirnovTestResult(
       testResult.pValue, testResult.statistic)))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala
index a7c5f489dea86..5b14a63ada4ef 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala
@@ -95,7 +95,7 @@ private[spark] class NodeIdCache(
       splits: Array[Array[Split]]): Unit = {
     if (prevNodeIdsForInstances != null) {
       // Unpersist the previous one if one exists.
-      prevNodeIdsForInstances.unpersist()
+      prevNodeIdsForInstances.unpersist(false)
     }
 
     prevNodeIdsForInstances = nodeIdsForInstances
@@ -166,9 +166,13 @@ private[spark] class NodeIdCache(
         }
       }
     }
+    if (nodeIdsForInstances != null) {
+      // Unpersist current one if one exists.
+      nodeIdsForInstances.unpersist(false)
+    }
     if (prevNodeIdsForInstances != null) {
       // Unpersist the previous one if one exists.
-      prevNodeIdsForInstances.unpersist()
+      prevNodeIdsForInstances.unpersist(false)
     }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index a0b507d2e718c..c2826dcc08634 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -270,6 +270,17 @@ class CrossValidatorModel private[ml] (
     this
   }
 
+  // A Python-friendly auxiliary method
+  private[tuning] def setSubModels(subModels: JList[JList[Model[_]]])
+    : CrossValidatorModel = {
+    _subModels = if (subModels != null) {
+      Some(subModels.asScala.toArray.map(_.asScala.toArray))
+    } else {
+      None
+    }
+    this
+  }
+
   /**
    * @return submodels represented in two dimension array. The index of outer array is the
    *         fold index, and the index of inner array corresponds to the ordering of
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 88ff0dfd75e96..8d1b9a8ddab59 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -262,6 +262,17 @@ class TrainValidationSplitModel private[ml] (
     this
   }
 
+  // A Python-friendly auxiliary method
+  private[tuning] def setSubModels(subModels: JList[Model[_]])
+    : TrainValidationSplitModel = {
+    _subModels = if (subModels != null) {
+      Some(subModels.asScala.toArray)
+    } else {
+      None
+    }
+    this
+  }
+
   /**
    * @return submodels represented in array. The index of array corresponds to the ordering of
    *         estimatorParamMaps
diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
index 43779878890db..35a250955b282 100644
--- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
+++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
@@ -42,7 +42,12 @@ public void setUp() throws IOException {
 
   @After
   public void tearDown() {
-    spark.stop();
-    spark = null;
+    try {
+      spark.stop();
+      spark = null;
+    } finally {
+      SparkSession.clearDefaultSession();
+      SparkSession.clearActiveSession();
+    }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 11e88367108b4..2c3417c7e4028 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -72,11 +72,12 @@ class OneVsRestSuite extends MLTest with DefaultReadWriteTest {
       .setClassifier(new LogisticRegression)
     assert(ova.getLabelCol === "label")
     assert(ova.getPredictionCol === "prediction")
+    assert(ova.getRawPredictionCol === "rawPrediction")
     val ovaModel = ova.fit(dataset)
 
     MLTestingUtils.checkCopyAndUids(ova, ovaModel)
 
-    assert(ovaModel.models.length === numClasses)
+    assert(ovaModel.numClasses === numClasses)
     val transformedDataset = ovaModel.transform(dataset)
 
     // check for label metadata in prediction col
@@ -179,6 +180,7 @@ class OneVsRestSuite extends MLTest with DefaultReadWriteTest {
     val dataset2 = dataset.select(col("label").as("y"), col("features").as("fea"))
     ovaModel.setFeaturesCol("fea")
     ovaModel.setPredictionCol("pred")
+    ovaModel.setRawPredictionCol("")
     val transformedDataset = ovaModel.transform(dataset2)
     val outputFields = transformedDataset.schema.fieldNames.toSet
     assert(outputFields === Set("y", "fea", "pred"))
@@ -190,7 +192,8 @@ class OneVsRestSuite extends MLTest with DefaultReadWriteTest {
     val ovr = new OneVsRest()
       .setClassifier(logReg)
     val output = ovr.fit(dataset).transform(dataset)
-    assert(output.schema.fieldNames.toSet === Set("label", "features", "prediction"))
+    assert(output.schema.fieldNames.toSet
+      === Set("label", "features", "prediction", "rawPrediction"))
   }
 
   test("SPARK-21306: OneVsRest should support setWeightCol") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
index ed9a39d8d1512..9b823259b1deb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
@@ -48,6 +48,14 @@ class BucketedRandomProjectionLSHSuite extends MLTest with DefaultReadWriteTest
     ParamsSuite.checkParams(model)
   }
 
+  test("setters") {
+    val model = new BucketedRandomProjectionLSHModel("brp", Array(Vectors.dense(0.0, 1.0)))
+      .setInputCol("testkeys")
+      .setOutputCol("testvalues")
+    assert(model.getInputCol  === "testkeys")
+    assert(model.getOutputCol === "testvalues")
+  }
+
   test("BucketedRandomProjectionLSH: default params") {
     val brp = new BucketedRandomProjectionLSH
     assert(brp.getNumHashTables === 1.0)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
index 005edf73d29be..cdd62be43b54c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
@@ -17,17 +17,15 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.feature.{IDFModel => OldIDFModel}
 import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.Row
 
-class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class IDFSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -57,7 +55,7 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
       Vectors.dense(0.0, 1.0, 2.0, 3.0),
       Vectors.sparse(numOfFeatures, Array(1), Array(1.0))
     )
-    val numOfData = data.size
+    val numOfData = data.length
     val idf = Vectors.dense(Array(0, 3, 1, 2).map { x =>
       math.log((numOfData + 1.0) / (x + 1.0))
     })
@@ -72,7 +70,7 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
 
     MLTestingUtils.checkCopyAndUids(idfEst, idfModel)
 
-    idfModel.transform(df).select("idfValue", "expected").collect().foreach {
+    testTransformer[(Vector, Vector)](df, idfModel, "idfValue", "expected") {
       case Row(x: Vector, y: Vector) =>
         assert(x ~== y absTol 1e-5, "Transformed vector is different with expected vector.")
     }
@@ -85,7 +83,7 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
       Vectors.dense(0.0, 1.0, 2.0, 3.0),
       Vectors.sparse(numOfFeatures, Array(1), Array(1.0))
     )
-    val numOfData = data.size
+    val numOfData = data.length
     val idf = Vectors.dense(Array(0, 3, 1, 2).map { x =>
       if (x > 0) math.log((numOfData + 1.0) / (x + 1.0)) else 0
     })
@@ -99,7 +97,7 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
       .setMinDocFreq(1)
       .fit(df)
 
-    idfModel.transform(df).select("idfValue", "expected").collect().foreach {
+    testTransformer[(Vector, Vector)](df, idfModel, "idfValue", "expected") {
       case Row(x: Vector, y: Vector) =>
         assert(x ~== y absTol 1e-5, "Transformed vector is different with expected vector.")
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
index c08b35b419266..75f63a623e6d8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
@@ -16,13 +16,12 @@
  */
 package org.apache.spark.ml.feature
 
-import org.apache.spark.{SparkException, SparkFunSuite}
-import org.apache.spark.ml.util.DefaultReadWriteTest
-import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.SparkException
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.sql.{DataFrame, Row}
 
-class ImputerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class ImputerSuite extends MLTest with DefaultReadWriteTest {
 
   test("Imputer for Double with default missing Value NaN") {
     val df = spark.createDataFrame( Seq(
@@ -76,6 +75,28 @@ class ImputerSuite extends SparkFunSuite with MLlibTestSparkContext with Default
     ImputerSuite.iterateStrategyTest(imputer, df)
   }
 
+  test("Imputer should work with Structured Streaming") {
+    val localSpark = spark
+    import localSpark.implicits._
+    val df = Seq[(java.lang.Double, Double)](
+      (4.0, 4.0),
+      (10.0, 10.0),
+      (10.0, 10.0),
+      (Double.NaN, 8.0),
+      (null, 8.0)
+    ).toDF("value", "expected_mean_value")
+    val imputer = new Imputer()
+      .setInputCols(Array("value"))
+      .setOutputCols(Array("out"))
+      .setStrategy("mean")
+    val model = imputer.fit(df)
+    testTransformer[(java.lang.Double, Double)](df, model, "expected_mean_value", "out") {
+      case Row(exp: java.lang.Double, out: Double) =>
+        assert((exp.isNaN && out.isNaN) || (exp == out),
+          s"Imputed values differ. Expected: $exp, actual: $out")
+    }
+  }
+
   test("Imputer throws exception when surrogate cannot be computed") {
     val df = spark.createDataFrame( Seq(
       (0, Double.NaN, 1.0, 1.0),
@@ -164,8 +185,6 @@ object ImputerSuite {
    * @param df DataFrame with columns "id", "value", "expected_mean", "expected_median"
    */
   def iterateStrategyTest(imputer: Imputer, df: DataFrame): Unit = {
-    val inputCols = imputer.getInputCols
-
     Seq("mean", "median").foreach { strategy =>
       imputer.setStrategy(strategy)
       val model = imputer.fit(df)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
index 54f059e5f143e..eea31fc7ae3f2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
@@ -19,15 +19,15 @@ package org.apache.spark.ml.feature
 
 import scala.collection.mutable.ArrayBuilder
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.SparkException
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.DefaultReadWriteTest
-import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.functions.col
 
-class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class InteractionSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -63,9 +63,9 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def
 
   test("numeric interaction") {
     val data = Seq(
-      (2, Vectors.dense(3.0, 4.0)),
-      (1, Vectors.dense(1.0, 5.0))
-    ).toDF("a", "b")
+      (2, Vectors.dense(3.0, 4.0), Vectors.dense(6.0, 8.0)),
+      (1, Vectors.dense(1.0, 5.0), Vectors.dense(1.0, 5.0))
+    ).toDF("a", "b", "expected")
     val groupAttr = new AttributeGroup(
       "b",
       Array[Attribute](
@@ -73,14 +73,15 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def
         NumericAttribute.defaultAttr.withName("bar")))
     val df = data.select(
       col("a").as("a", NumericAttribute.defaultAttr.toMetadata()),
-      col("b").as("b", groupAttr.toMetadata()))
+      col("b").as("b", groupAttr.toMetadata()),
+      col("expected"))
     val trans = new Interaction().setInputCols(Array("a", "b")).setOutputCol("features")
+    testTransformer[(Int, Vector, Vector)](df, trans, "features", "expected") {
+      case Row(features: Vector, expected: Vector) =>
+        assert(features === expected)
+    }
+
     val res = trans.transform(df)
-    val expected = Seq(
-      (2, Vectors.dense(3.0, 4.0), Vectors.dense(6.0, 8.0)),
-      (1, Vectors.dense(1.0, 5.0), Vectors.dense(1.0, 5.0))
-    ).toDF("a", "b", "features")
-    assert(res.collect() === expected.collect())
     val attrs = AttributeGroup.fromStructField(res.schema("features"))
     val expectedAttrs = new AttributeGroup(
       "features",
@@ -92,9 +93,9 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def
 
   test("nominal interaction") {
     val data = Seq(
-      (2, Vectors.dense(3.0, 4.0)),
-      (1, Vectors.dense(1.0, 5.0))
-    ).toDF("a", "b")
+      (2, Vectors.dense(3.0, 4.0), Vectors.dense(0, 0, 0, 0, 3, 4)),
+      (1, Vectors.dense(1.0, 5.0), Vectors.dense(0, 0, 1, 5, 0, 0))
+    ).toDF("a", "b", "expected")
     val groupAttr = new AttributeGroup(
       "b",
       Array[Attribute](
@@ -103,14 +104,15 @@ class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with Def
     val df = data.select(
       col("a").as(
         "a", NominalAttribute.defaultAttr.withValues(Array("up", "down", "left")).toMetadata()),
-      col("b").as("b", groupAttr.toMetadata()))
+      col("b").as("b", groupAttr.toMetadata()),
+      col("expected"))
     val trans = new Interaction().setInputCols(Array("a", "b")).setOutputCol("features")
+    testTransformer[(Int, Vector, Vector)](df, trans, "features", "expected") {
+      case Row(features: Vector, expected: Vector) =>
+        assert(features === expected)
+    }
+
     val res = trans.transform(df)
-    val expected = Seq(
-      (2, Vectors.dense(3.0, 4.0), Vectors.dense(0, 0, 0, 0, 3, 4)),
-      (1, Vectors.dense(1.0, 5.0), Vectors.dense(0, 0, 1, 5, 0, 0))
-    ).toDF("a", "b", "features")
-    assert(res.collect() === expected.collect())
     val attrs = AttributeGroup.fromStructField(res.schema("features"))
     val expectedAttrs = new AttributeGroup(
       "features",
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
index 918da4f9388d4..8dd0f0cb91e37 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
@@ -14,15 +14,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.spark.ml.feature
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{Vector, Vectors}
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.sql.Row
 
-class MaxAbsScalerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class MaxAbsScalerSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -45,9 +44,10 @@ class MaxAbsScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De
       .setOutputCol("scaled")
 
     val model = scaler.fit(df)
-    model.transform(df).select("expected", "scaled").collect()
-      .foreach { case Row(vector1: Vector, vector2: Vector) =>
-      assert(vector1.equals(vector2), s"MaxAbsScaler ut error: $vector2 should be $vector1")
+    testTransformer[(Vector, Vector)](df, model, "expected", "scaled") {
+      case Row(expectedVec: Vector, actualVec: Vector) =>
+        assert(expectedVec === actualVec,
+          s"MaxAbsScaler error: Expected $expectedVec but computed $actualVec")
     }
 
     MLTestingUtils.checkCopyAndUids(scaler, model)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
index 96df68dbdf053..1c2956cb82908 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
@@ -17,14 +17,13 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.Dataset
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
+import org.apache.spark.sql.{Dataset, Row}
 
-class MinHashLSHSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+
+class MinHashLSHSuite extends MLTest with DefaultReadWriteTest {
 
   @transient var dataset: Dataset[_] = _
 
@@ -43,6 +42,14 @@ class MinHashLSHSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
     ParamsSuite.checkParams(model)
   }
 
+  test("setters") {
+    val model = new MinHashLSHModel("mh", randCoefficients = Array((1, 0)))
+      .setInputCol("testkeys")
+      .setOutputCol("testvalues")
+    assert(model.getInputCol  === "testkeys")
+    assert(model.getOutputCol === "testvalues")
+  }
+
   test("MinHashLSH: default params") {
     val rp = new MinHashLSH
     assert(rp.getNumHashTables === 1.0)
@@ -167,4 +174,20 @@ class MinHashLSHSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
     assert(precision == 1.0)
     assert(recall >= 0.7)
   }
+
+  test("MinHashLSHModel.transform should work with Structured Streaming") {
+    val localSpark = spark
+    import localSpark.implicits._
+
+    val model = new MinHashLSHModel("mh", randCoefficients = Array((1, 0)))
+    model.set(model.inputCol, "keys")
+    testTransformer[Tuple1[Vector]](dataset.toDF(), model, "keys", model.getOutputCol) {
+      case Row(_: Vector, output: Seq[_]) =>
+        assert(output.length === model.randCoefficients.length)
+        // no AND-amplification yet: SPARK-18450, so each hash output is of length 1
+        output.foreach {
+          case hashOutput: Vector => assert(hashOutput.size === 1)
+        }
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
index 51db74eb739ca..2d965f2ca2c54 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
@@ -17,13 +17,11 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{Vector, Vectors}
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.sql.Row
 
-class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class MinMaxScalerSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -48,9 +46,9 @@ class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De
       .setMax(5)
 
     val model = scaler.fit(df)
-    model.transform(df).select("expected", "scaled").collect()
-      .foreach { case Row(vector1: Vector, vector2: Vector) =>
-        assert(vector1.equals(vector2), "Transformed vector is different with expected.")
+    testTransformer[(Vector, Vector)](df, model, "expected", "scaled") {
+      case Row(vector1: Vector, vector2: Vector) =>
+        assert(vector1 === vector2, "Transformed vector is different with expected.")
     }
 
     MLTestingUtils.checkCopyAndUids(scaler, model)
@@ -114,7 +112,7 @@ class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De
     val model = scaler.fit(df)
     model.transform(df).select("expected", "scaled").collect()
       .foreach { case Row(vector1: Vector, vector2: Vector) =>
-        assert(vector1.equals(vector2), "Transformed vector is different with expected.")
+        assert(vector1 === vector2, "Transformed vector is different with expected.")
       }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
index e5956ee9942aa..201a335e0d7be 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
@@ -84,7 +84,7 @@ class NGramSuite extends MLTest with DefaultReadWriteTest {
 
   def testNGram(t: NGram, dataFrame: DataFrame): Unit = {
     testTransformer[(Seq[String], Seq[String])](dataFrame, t, "nGrams", "wantedNGrams") {
-      case Row(actualNGrams : Seq[String], wantedNGrams: Seq[String]) =>
+      case Row(actualNGrams : Seq[_], wantedNGrams: Seq[_]) =>
         assert(actualNGrams === wantedNGrams)
     }
   }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index b37b4d51775e8..a87fa68422c34 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,12 +36,17 @@ object MimaExcludes {
 
   // Exclude rules for 2.4.x
   lazy val v24excludes = v23excludes ++ Seq(
+    // [SPARK-22941][core] Do not exit JVM when submit fails with in-process launcher.
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkSubmit.printWarning"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkSubmit.parseSparkConfProperty"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkSubmit.printVersionAndExit"),
+
     // [SPARK-23412][ML] Add cosine distance measure to BisectingKmeans
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasDistanceMeasure.org$apache$spark$ml$param$shared$HasDistanceMeasure$_setter_$distanceMeasure_="),
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasDistanceMeasure.getDistanceMeasure"),
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasDistanceMeasure.distanceMeasure"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel#SaveLoadV1_0.load"),
-    
+
     // [SPARK-20659] Remove StorageStatus, or make it private
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.SparkExecutorInfo.totalOffHeapStorageMemory"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.SparkExecutorInfo.usedOffHeapStorageMemory"),
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index fbbe3d0307c81..ec17653a1adf9 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1543,12 +1543,12 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03,
                  solver="l-bfgs", initialWeights=None, probabilityCol="probability",
-                 rawPredicitionCol="rawPrediction"):
+                 rawPredictionCol="rawPrediction"):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \
                  solver="l-bfgs", initialWeights=None, probabilityCol="probability", \
-                 rawPredicitionCol="rawPrediction")
+                 rawPredictionCol="rawPrediction")
         """
         super(MultilayerPerceptronClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1562,12 +1562,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03,
                   solver="l-bfgs", initialWeights=None, probabilityCol="probability",
-                  rawPredicitionCol="rawPrediction"):
+                  rawPredictionCol="rawPrediction"):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \
                   solver="l-bfgs", initialWeights=None, probabilityCol="probability", \
-                  rawPredicitionCol="rawPrediction"):
+                  rawPredictionCol="rawPrediction"):
         Sets params for MultilayerPerceptronClassifier.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 5a3e0dd655150..cdda30cfab482 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2701,7 +2701,8 @@ def setParams(self, inputCol=None, outputCol=None):
 
 
 @inherit_doc
-class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, HasHandleInvalid, JavaMLReadable,
+                      JavaMLWritable):
     """
     A feature transformer that merges multiple columns into a vector column.
 
@@ -2719,25 +2720,56 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadabl
     >>> loadedAssembler = VectorAssembler.load(vectorAssemblerPath)
     >>> loadedAssembler.transform(df).head().freqs == vecAssembler.transform(df).head().freqs
     True
+    >>> dfWithNullsAndNaNs = spark.createDataFrame(
+    ...    [(1.0, 2.0, None), (3.0, float("nan"), 4.0), (5.0, 6.0, 7.0)], ["a", "b", "c"])
+    >>> vecAssembler2 = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features",
+    ...    handleInvalid="keep")
+    >>> vecAssembler2.transform(dfWithNullsAndNaNs).show()
+    +---+---+----+-------------+
+    |  a|  b|   c|     features|
+    +---+---+----+-------------+
+    |1.0|2.0|null|[1.0,2.0,NaN]|
+    |3.0|NaN| 4.0|[3.0,NaN,4.0]|
+    |5.0|6.0| 7.0|[5.0,6.0,7.0]|
+    +---+---+----+-------------+
+    ...
+    >>> vecAssembler2.setParams(handleInvalid="skip").transform(dfWithNullsAndNaNs).show()
+    +---+---+---+-------------+
+    |  a|  b|  c|     features|
+    +---+---+---+-------------+
+    |5.0|6.0|7.0|[5.0,6.0,7.0]|
+    +---+---+---+-------------+
+    ...
 
     .. versionadded:: 1.4.0
     """
 
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data (NULL " +
+                          "and NaN values). Options are 'skip' (filter out rows with invalid " +
+                          "data), 'error' (throw an error), or 'keep' (return relevant number " +
+                          "of NaN in the output). Column lengths are taken from the size of ML " +
+                          "Attribute Group, which can be set using `VectorSizeHint` in a " +
+                          "pipeline before `VectorAssembler`. Column lengths can also be " +
+                          "inferred from first rows of the data since it is safe to do so but " +
+                          "only in case of 'error' or 'skip').",
+                          typeConverter=TypeConverters.toString)
+
     @keyword_only
-    def __init__(self, inputCols=None, outputCol=None):
+    def __init__(self, inputCols=None, outputCol=None, handleInvalid="error"):
         """
-        __init__(self, inputCols=None, outputCol=None)
+        __init__(self, inputCols=None, outputCol=None, handleInvalid="error")
         """
         super(VectorAssembler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid)
+        self._setDefault(handleInvalid="error")
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, inputCols=None, outputCol=None):
+    def setParams(self, inputCols=None, outputCol=None, handleInvalid="error"):
         """
-        setParams(self, inputCols=None, outputCol=None)
+        setParams(self, inputCols=None, outputCol=None, handleInvalid="error")
         Sets params for this VectorAssembler.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index db951d81de1e7..6e9e0a34cdfde 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -157,6 +157,11 @@ def get$Name(self):
          "TypeConverters.toInt"),
         ("parallelism", "the number of threads to use when running parallel algorithms (>= 1).",
          "1", "TypeConverters.toInt"),
+        ("collectSubModels", "Param for whether to collect a list of sub-models trained during " +
+         "tuning. If set to false, then only the single best sub-model will be available after " +
+         "fitting. If set to true, then all sub-models will be available. Warning: For large " +
+         "models, collecting all sub-models can cause OOMs on the Spark driver.",
+         "False", "TypeConverters.toBoolean"),
         ("loss", "the loss function to be optimized.", None, "TypeConverters.toString")]
 
     code = []
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 474c38764e5a1..08408ee8fbfcc 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -655,6 +655,30 @@ def getParallelism(self):
         return self.getOrDefault(self.parallelism)
 
 
+class HasCollectSubModels(Params):
+    """
+    Mixin for param collectSubModels: Param for whether to collect a list of sub-models trained during tuning. If set to false, then only the single best sub-model will be available after fitting. If set to true, then all sub-models will be available. Warning: For large models, collecting all sub-models can cause OOMs on the Spark driver.
+    """
+
+    collectSubModels = Param(Params._dummy(), "collectSubModels", "Param for whether to collect a list of sub-models trained during tuning. If set to false, then only the single best sub-model will be available after fitting. If set to true, then all sub-models will be available. Warning: For large models, collecting all sub-models can cause OOMs on the Spark driver.", typeConverter=TypeConverters.toBoolean)
+
+    def __init__(self):
+        super(HasCollectSubModels, self).__init__()
+        self._setDefault(collectSubModels=False)
+
+    def setCollectSubModels(self, value):
+        """
+        Sets the value of :py:attr:`collectSubModels`.
+        """
+        return self._set(collectSubModels=value)
+
+    def getCollectSubModels(self):
+        """
+        Gets the value of collectSubModels or its default value.
+        """
+        return self.getOrDefault(self.collectSubModels)
+
+
 class HasLoss(Params):
     """
     Mixin for param loss: the loss function to be optimized.
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index 0eeb5e528434a..93d0f4fd9148f 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -32,32 +32,6 @@ class ChiSquareTest(object):
 
     The null hypothesis is that the occurrence of the outcomes is statistically independent.
 
-    :param dataset:
-      DataFrame of categorical labels and categorical features.
-      Real-valued features will be treated as categorical for each distinct value.
-    :param featuresCol:
-      Name of features column in dataset, of type `Vector` (`VectorUDT`).
-    :param labelCol:
-      Name of label column in dataset, of any numerical type.
-    :return:
-      DataFrame containing the test result for every feature against the label.
-      This DataFrame will contain a single Row with the following fields:
-      - `pValues: Vector`
-      - `degreesOfFreedom: Array[Int]`
-      - `statistics: Vector`
-      Each of these fields has one value per feature.
-
-    >>> from pyspark.ml.linalg import Vectors
-    >>> from pyspark.ml.stat import ChiSquareTest
-    >>> dataset = [[0, Vectors.dense([0, 0, 1])],
-    ...            [0, Vectors.dense([1, 0, 1])],
-    ...            [1, Vectors.dense([2, 1, 1])],
-    ...            [1, Vectors.dense([3, 1, 1])]]
-    >>> dataset = spark.createDataFrame(dataset, ["label", "features"])
-    >>> chiSqResult = ChiSquareTest.test(dataset, 'features', 'label')
-    >>> chiSqResult.select("degreesOfFreedom").collect()[0]
-    Row(degreesOfFreedom=[3, 1, 0])
-
     .. versionadded:: 2.2.0
 
     """
@@ -66,6 +40,32 @@ class ChiSquareTest(object):
     def test(dataset, featuresCol, labelCol):
         """
         Perform a Pearson's independence test using dataset.
+
+        :param dataset:
+          DataFrame of categorical labels and categorical features.
+          Real-valued features will be treated as categorical for each distinct value.
+        :param featuresCol:
+          Name of features column in dataset, of type `Vector` (`VectorUDT`).
+        :param labelCol:
+          Name of label column in dataset, of any numerical type.
+        :return:
+          DataFrame containing the test result for every feature against the label.
+          This DataFrame will contain a single Row with the following fields:
+          - `pValues: Vector`
+          - `degreesOfFreedom: Array[Int]`
+          - `statistics: Vector`
+          Each of these fields has one value per feature.
+
+        >>> from pyspark.ml.linalg import Vectors
+        >>> from pyspark.ml.stat import ChiSquareTest
+        >>> dataset = [[0, Vectors.dense([0, 0, 1])],
+        ...            [0, Vectors.dense([1, 0, 1])],
+        ...            [1, Vectors.dense([2, 1, 1])],
+        ...            [1, Vectors.dense([3, 1, 1])]]
+        >>> dataset = spark.createDataFrame(dataset, ["label", "features"])
+        >>> chiSqResult = ChiSquareTest.test(dataset, 'features', 'label')
+        >>> chiSqResult.select("degreesOfFreedom").collect()[0]
+        Row(degreesOfFreedom=[3, 1, 0])
         """
         sc = SparkContext._active_spark_context
         javaTestObj = _jvm().org.apache.spark.ml.stat.ChiSquareTest
@@ -85,40 +85,6 @@ class Correlation(object):
       which is fairly costly. Cache the input Dataset before calling corr with `method = 'spearman'`
       to avoid recomputing the common lineage.
 
-    :param dataset:
-      A dataset or a dataframe.
-    :param column:
-      The name of the column of vectors for which the correlation coefficient needs
-      to be computed. This must be a column of the dataset, and it must contain
-      Vector objects.
-    :param method:
-      String specifying the method to use for computing correlation.
-      Supported: `pearson` (default), `spearman`.
-    :return:
-      A dataframe that contains the correlation matrix of the column of vectors. This
-      dataframe contains a single row and a single column of name
-      '$METHODNAME($COLUMN)'.
-
-    >>> from pyspark.ml.linalg import Vectors
-    >>> from pyspark.ml.stat import Correlation
-    >>> dataset = [[Vectors.dense([1, 0, 0, -2])],
-    ...            [Vectors.dense([4, 5, 0, 3])],
-    ...            [Vectors.dense([6, 7, 0, 8])],
-    ...            [Vectors.dense([9, 0, 0, 1])]]
-    >>> dataset = spark.createDataFrame(dataset, ['features'])
-    >>> pearsonCorr = Correlation.corr(dataset, 'features', 'pearson').collect()[0][0]
-    >>> print(str(pearsonCorr).replace('nan', 'NaN'))
-    DenseMatrix([[ 1.        ,  0.0556...,         NaN,  0.4004...],
-                 [ 0.0556...,  1.        ,         NaN,  0.9135...],
-                 [        NaN,         NaN,  1.        ,         NaN],
-                 [ 0.4004...,  0.9135...,         NaN,  1.        ]])
-    >>> spearmanCorr = Correlation.corr(dataset, 'features', method='spearman').collect()[0][0]
-    >>> print(str(spearmanCorr).replace('nan', 'NaN'))
-    DenseMatrix([[ 1.        ,  0.1054...,         NaN,  0.4       ],
-                 [ 0.1054...,  1.        ,         NaN,  0.9486... ],
-                 [        NaN,         NaN,  1.        ,         NaN],
-                 [ 0.4       ,  0.9486... ,         NaN,  1.        ]])
-
     .. versionadded:: 2.2.0
 
     """
@@ -127,6 +93,40 @@ class Correlation(object):
     def corr(dataset, column, method="pearson"):
         """
         Compute the correlation matrix with specified method using dataset.
+
+        :param dataset:
+          A Dataset or a DataFrame.
+        :param column:
+          The name of the column of vectors for which the correlation coefficient needs
+          to be computed. This must be a column of the dataset, and it must contain
+          Vector objects.
+        :param method:
+          String specifying the method to use for computing correlation.
+          Supported: `pearson` (default), `spearman`.
+        :return:
+          A DataFrame that contains the correlation matrix of the column of vectors. This
+          DataFrame contains a single row and a single column of name
+          '$METHODNAME($COLUMN)'.
+
+        >>> from pyspark.ml.linalg import Vectors
+        >>> from pyspark.ml.stat import Correlation
+        >>> dataset = [[Vectors.dense([1, 0, 0, -2])],
+        ...            [Vectors.dense([4, 5, 0, 3])],
+        ...            [Vectors.dense([6, 7, 0, 8])],
+        ...            [Vectors.dense([9, 0, 0, 1])]]
+        >>> dataset = spark.createDataFrame(dataset, ['features'])
+        >>> pearsonCorr = Correlation.corr(dataset, 'features', 'pearson').collect()[0][0]
+        >>> print(str(pearsonCorr).replace('nan', 'NaN'))
+        DenseMatrix([[ 1.        ,  0.0556...,         NaN,  0.4004...],
+                     [ 0.0556...,  1.        ,         NaN,  0.9135...],
+                     [        NaN,         NaN,  1.        ,         NaN],
+                     [ 0.4004...,  0.9135...,         NaN,  1.        ]])
+        >>> spearmanCorr = Correlation.corr(dataset, 'features', method='spearman').collect()[0][0]
+        >>> print(str(spearmanCorr).replace('nan', 'NaN'))
+        DenseMatrix([[ 1.        ,  0.1054...,         NaN,  0.4       ],
+                     [ 0.1054...,  1.        ,         NaN,  0.9486... ],
+                     [        NaN,         NaN,  1.        ,         NaN],
+                     [ 0.4       ,  0.9486... ,         NaN,  1.        ]])
         """
         sc = SparkContext._active_spark_context
         javaCorrObj = _jvm().org.apache.spark.ml.stat.Correlation
@@ -134,6 +134,67 @@ def corr(dataset, column, method="pearson"):
         return _java2py(sc, javaCorrObj.corr(*args))
 
 
+class KolmogorovSmirnovTest(object):
+    """
+    .. note:: Experimental
+
+    Conduct the two-sided Kolmogorov Smirnov (KS) test for data sampled from a continuous
+    distribution.
+
+    By comparing the largest difference between the empirical cumulative
+    distribution of the sample data and the theoretical distribution we can provide a test for the
+    the null hypothesis that the sample data comes from that theoretical distribution.
+
+    .. versionadded:: 2.4.0
+
+    """
+    @staticmethod
+    @since("2.4.0")
+    def test(dataset, sampleCol, distName, *params):
+        """
+        Conduct a one-sample, two-sided Kolmogorov-Smirnov test for probability distribution
+        equality. Currently supports the normal distribution, taking as parameters the mean and
+        standard deviation.
+
+        :param dataset:
+          a Dataset or a DataFrame containing the sample of data to test.
+        :param sampleCol:
+          Name of sample column in dataset, of any numerical type.
+        :param distName:
+          a `string` name for a theoretical distribution, currently only support "norm".
+        :param params:
+          a list of `Double` values specifying the parameters to be used for the theoretical
+          distribution. For "norm" distribution, the parameters includes mean and variance.
+        :return:
+          A DataFrame that contains the Kolmogorov-Smirnov test result for the input sampled data.
+          This DataFrame will contain a single Row with the following fields:
+          - `pValue: Double`
+          - `statistic: Double`
+
+        >>> from pyspark.ml.stat import KolmogorovSmirnovTest
+        >>> dataset = [[-1.0], [0.0], [1.0]]
+        >>> dataset = spark.createDataFrame(dataset, ['sample'])
+        >>> ksResult = KolmogorovSmirnovTest.test(dataset, 'sample', 'norm', 0.0, 1.0).first()
+        >>> round(ksResult.pValue, 3)
+        1.0
+        >>> round(ksResult.statistic, 3)
+        0.175
+        >>> dataset = [[2.0], [3.0], [4.0]]
+        >>> dataset = spark.createDataFrame(dataset, ['sample'])
+        >>> ksResult = KolmogorovSmirnovTest.test(dataset, 'sample', 'norm', 3.0, 1.0).first()
+        >>> round(ksResult.pValue, 3)
+        1.0
+        >>> round(ksResult.statistic, 3)
+        0.175
+        """
+        sc = SparkContext._active_spark_context
+        javaTestObj = _jvm().org.apache.spark.ml.stat.KolmogorovSmirnovTest
+        dataset = _py2java(sc, dataset)
+        params = [float(param) for param in params]
+        return _java2py(sc, javaTestObj.test(dataset, sampleCol, distName,
+                                             _jvm().PythonUtils.toSeq(params)))
+
+
 if __name__ == "__main__":
     import doctest
     import pyspark.ml.stat
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 4ce54547eab09..2ec0be60e9fa9 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1018,6 +1018,50 @@ def test_parallel_evaluation(self):
         cvParallelModel = cv.fit(dataset)
         self.assertEqual(cvSerialModel.avgMetrics, cvParallelModel.avgMetrics)
 
+    def test_expose_sub_models(self):
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+        evaluator = BinaryClassificationEvaluator()
+
+        numFolds = 3
+        cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
+                            numFolds=numFolds, collectSubModels=True)
+
+        def checkSubModels(subModels):
+            self.assertEqual(len(subModels), numFolds)
+            for i in range(numFolds):
+                self.assertEqual(len(subModels[i]), len(grid))
+
+        cvModel = cv.fit(dataset)
+        checkSubModels(cvModel.subModels)
+
+        # Test the default value for option "persistSubModel" to be "true"
+        testSubPath = temp_path + "/testCrossValidatorSubModels"
+        savingPathWithSubModels = testSubPath + "cvModel3"
+        cvModel.save(savingPathWithSubModels)
+        cvModel3 = CrossValidatorModel.load(savingPathWithSubModels)
+        checkSubModels(cvModel3.subModels)
+        cvModel4 = cvModel3.copy()
+        checkSubModels(cvModel4.subModels)
+
+        savingPathWithoutSubModels = testSubPath + "cvModel2"
+        cvModel.write().option("persistSubModels", "false").save(savingPathWithoutSubModels)
+        cvModel2 = CrossValidatorModel.load(savingPathWithoutSubModels)
+        self.assertEqual(cvModel2.subModels, None)
+
+        for i in range(numFolds):
+            for j in range(len(grid)):
+                self.assertEqual(cvModel.subModels[i][j].uid, cvModel3.subModels[i][j].uid)
+
     def test_save_load_nested_estimator(self):
         temp_path = tempfile.mkdtemp()
         dataset = self.spark.createDataFrame(
@@ -1186,6 +1230,40 @@ def test_parallel_evaluation(self):
         tvsParallelModel = tvs.fit(dataset)
         self.assertEqual(tvsSerialModel.validationMetrics, tvsParallelModel.validationMetrics)
 
+    def test_expose_sub_models(self):
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+        evaluator = BinaryClassificationEvaluator()
+        tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
+                                   collectSubModels=True)
+        tvsModel = tvs.fit(dataset)
+        self.assertEqual(len(tvsModel.subModels), len(grid))
+
+        # Test the default value for option "persistSubModel" to be "true"
+        testSubPath = temp_path + "/testTrainValidationSplitSubModels"
+        savingPathWithSubModels = testSubPath + "cvModel3"
+        tvsModel.save(savingPathWithSubModels)
+        tvsModel3 = TrainValidationSplitModel.load(savingPathWithSubModels)
+        self.assertEqual(len(tvsModel3.subModels), len(grid))
+        tvsModel4 = tvsModel3.copy()
+        self.assertEqual(len(tvsModel4.subModels), len(grid))
+
+        savingPathWithoutSubModels = testSubPath + "cvModel2"
+        tvsModel.write().option("persistSubModels", "false").save(savingPathWithoutSubModels)
+        tvsModel2 = TrainValidationSplitModel.load(savingPathWithoutSubModels)
+        self.assertEqual(tvsModel2.subModels, None)
+
+        for i in range(len(grid)):
+            self.assertEqual(tvsModel.subModels[i].uid, tvsModel3.subModels[i].uid)
+
     def test_save_load_nested_estimator(self):
         # This tests saving and loading the trained model only.
         # Save/load for TrainValidationSplit will be added later: SPARK-13786
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 545e24ca05aa5..0c8029f293cfe 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -24,7 +24,7 @@
 from pyspark.ml import Estimator, Model
 from pyspark.ml.common import _py2java
 from pyspark.ml.param import Params, Param, TypeConverters
-from pyspark.ml.param.shared import HasParallelism, HasSeed
+from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
 from pyspark.ml.util import *
 from pyspark.ml.wrapper import JavaParams
 from pyspark.sql.functions import rand
@@ -33,7 +33,7 @@
            'TrainValidationSplitModel']
 
 
-def _parallelFitTasks(est, train, eva, validation, epm):
+def _parallelFitTasks(est, train, eva, validation, epm, collectSubModel):
     """
     Creates a list of callables which can be called from different threads to fit and evaluate
     an estimator in parallel. Each callable returns an `(index, metric)` pair.
@@ -43,14 +43,15 @@ def _parallelFitTasks(est, train, eva, validation, epm):
     :param eva: Evaluator, used to compute `metric`
     :param validation: DataFrame, validation data set, used for evaluation.
     :param epm: Sequence of ParamMap, params maps to be used during fitting & evaluation.
-    :return: (int, float), an index into `epm` and the associated metric value.
+    :param collectSubModel: Whether to collect sub model.
+    :return: (int, float, subModel), an index into `epm` and the associated metric value.
     """
     modelIter = est.fitMultiple(train, epm)
 
     def singleTask():
         index, model = next(modelIter)
         metric = eva.evaluate(model.transform(validation, epm[index]))
-        return index, metric
+        return index, metric, model if collectSubModel else None
 
     return [singleTask] * len(epm)
 
@@ -194,7 +195,8 @@ def _to_java_impl(self):
         return java_estimator, java_epms, java_evaluator
 
 
-class CrossValidator(Estimator, ValidatorParams, HasParallelism, MLReadable, MLWritable):
+class CrossValidator(Estimator, ValidatorParams, HasParallelism, HasCollectSubModels,
+                     MLReadable, MLWritable):
     """
 
     K-fold cross validation performs model selection by splitting the dataset into a set of
@@ -233,10 +235,10 @@ class CrossValidator(Estimator, ValidatorParams, HasParallelism, MLReadable, MLW
 
     @keyword_only
     def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
-                 seed=None, parallelism=1):
+                 seed=None, parallelism=1, collectSubModels=False):
         """
         __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
-                 seed=None, parallelism=1)
+                 seed=None, parallelism=1, collectSubModels=False)
         """
         super(CrossValidator, self).__init__()
         self._setDefault(numFolds=3, parallelism=1)
@@ -246,10 +248,10 @@ def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numF
     @keyword_only
     @since("1.4.0")
     def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
-                  seed=None, parallelism=1):
+                  seed=None, parallelism=1, collectSubModels=False):
         """
         setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
-                  seed=None, parallelism=1):
+                  seed=None, parallelism=1, collectSubModels=False):
         Sets params for cross validator.
         """
         kwargs = self._input_kwargs
@@ -282,6 +284,10 @@ def _fit(self, dataset):
         metrics = [0.0] * numModels
 
         pool = ThreadPool(processes=min(self.getParallelism(), numModels))
+        subModels = None
+        collectSubModelsParam = self.getCollectSubModels()
+        if collectSubModelsParam:
+            subModels = [[None for j in range(numModels)] for i in range(nFolds)]
 
         for i in range(nFolds):
             validateLB = i * h
@@ -290,9 +296,12 @@ def _fit(self, dataset):
             validation = df.filter(condition).cache()
             train = df.filter(~condition).cache()
 
-            tasks = _parallelFitTasks(est, train, eva, validation, epm)
-            for j, metric in pool.imap_unordered(lambda f: f(), tasks):
+            tasks = _parallelFitTasks(est, train, eva, validation, epm, collectSubModelsParam)
+            for j, metric, subModel in pool.imap_unordered(lambda f: f(), tasks):
                 metrics[j] += (metric / nFolds)
+                if collectSubModelsParam:
+                    subModels[i][j] = subModel
+
             validation.unpersist()
             train.unpersist()
 
@@ -301,7 +310,7 @@ def _fit(self, dataset):
         else:
             bestIndex = np.argmin(metrics)
         bestModel = est.fit(dataset, epm[bestIndex])
-        return self._copyValues(CrossValidatorModel(bestModel, metrics))
+        return self._copyValues(CrossValidatorModel(bestModel, metrics, subModels))
 
     @since("1.4.0")
     def copy(self, extra=None):
@@ -345,9 +354,11 @@ def _from_java(cls, java_stage):
         numFolds = java_stage.getNumFolds()
         seed = java_stage.getSeed()
         parallelism = java_stage.getParallelism()
+        collectSubModels = java_stage.getCollectSubModels()
         # Create a new instance of this stage.
         py_stage = cls(estimator=estimator, estimatorParamMaps=epms, evaluator=evaluator,
-                       numFolds=numFolds, seed=seed, parallelism=parallelism)
+                       numFolds=numFolds, seed=seed, parallelism=parallelism,
+                       collectSubModels=collectSubModels)
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
@@ -367,6 +378,7 @@ def _to_java(self):
         _java_obj.setSeed(self.getSeed())
         _java_obj.setNumFolds(self.getNumFolds())
         _java_obj.setParallelism(self.getParallelism())
+        _java_obj.setCollectSubModels(self.getCollectSubModels())
 
         return _java_obj
 
@@ -381,13 +393,15 @@ class CrossValidatorModel(Model, ValidatorParams, MLReadable, MLWritable):
     .. versionadded:: 1.4.0
     """
 
-    def __init__(self, bestModel, avgMetrics=[]):
+    def __init__(self, bestModel, avgMetrics=[], subModels=None):
         super(CrossValidatorModel, self).__init__()
         #: best model from cross validation
         self.bestModel = bestModel
         #: Average cross-validation metrics for each paramMap in
         #: CrossValidator.estimatorParamMaps, in the corresponding order.
         self.avgMetrics = avgMetrics
+        #: sub model list from cross validation
+        self.subModels = subModels
 
     def _transform(self, dataset):
         return self.bestModel.transform(dataset)
@@ -399,6 +413,7 @@ def copy(self, extra=None):
         and some extra params. This copies the underlying bestModel,
         creates a deep copy of the embedded paramMap, and
         copies the embedded and extra parameters over.
+        It does not copy the extra Params into the subModels.
 
         :param extra: Extra parameters to copy to the new instance
         :return: Copy of this instance
@@ -407,7 +422,8 @@ def copy(self, extra=None):
             extra = dict()
         bestModel = self.bestModel.copy(extra)
         avgMetrics = self.avgMetrics
-        return CrossValidatorModel(bestModel, avgMetrics)
+        subModels = self.subModels
+        return CrossValidatorModel(bestModel, avgMetrics, subModels)
 
     @since("2.3.0")
     def write(self):
@@ -426,13 +442,17 @@ def _from_java(cls, java_stage):
         Given a Java CrossValidatorModel, create and return a Python wrapper of it.
         Used for ML persistence.
         """
-
         bestModel = JavaParams._from_java(java_stage.bestModel())
         estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage)
 
         py_stage = cls(bestModel=bestModel).setEstimator(estimator)
         py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)
 
+        if java_stage.hasSubModels():
+            py_stage.subModels = [[JavaParams._from_java(sub_model)
+                                   for sub_model in fold_sub_models]
+                                  for fold_sub_models in java_stage.subModels()]
+
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
@@ -454,10 +474,16 @@ def _to_java(self):
         _java_obj.set("evaluator", evaluator)
         _java_obj.set("estimator", estimator)
         _java_obj.set("estimatorParamMaps", epms)
+
+        if self.subModels is not None:
+            java_sub_models = [[sub_model._to_java() for sub_model in fold_sub_models]
+                               for fold_sub_models in self.subModels]
+            _java_obj.setSubModels(java_sub_models)
         return _java_obj
 
 
-class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, MLReadable, MLWritable):
+class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, HasCollectSubModels,
+                           MLReadable, MLWritable):
     """
     .. note:: Experimental
 
@@ -492,10 +518,10 @@ class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, MLReadabl
 
     @keyword_only
     def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,
-                 parallelism=1, seed=None):
+                 parallelism=1, collectSubModels=False, seed=None):
         """
         __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,\
-                 parallelism=1, seed=None)
+                 parallelism=1, collectSubModels=False, seed=None)
         """
         super(TrainValidationSplit, self).__init__()
         self._setDefault(trainRatio=0.75, parallelism=1)
@@ -505,10 +531,10 @@ def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trai
     @since("2.0.0")
     @keyword_only
     def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,
-                  parallelism=1, seed=None):
+                  parallelism=1, collectSubModels=False, seed=None):
         """
         setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,\
-                  parallelism=1, seed=None):
+                  parallelism=1, collectSubModels=False, seed=None):
         Sets params for the train validation split.
         """
         kwargs = self._input_kwargs
@@ -541,11 +567,19 @@ def _fit(self, dataset):
         validation = df.filter(condition).cache()
         train = df.filter(~condition).cache()
 
-        tasks = _parallelFitTasks(est, train, eva, validation, epm)
+        subModels = None
+        collectSubModelsParam = self.getCollectSubModels()
+        if collectSubModelsParam:
+            subModels = [None for i in range(numModels)]
+
+        tasks = _parallelFitTasks(est, train, eva, validation, epm, collectSubModelsParam)
         pool = ThreadPool(processes=min(self.getParallelism(), numModels))
         metrics = [None] * numModels
-        for j, metric in pool.imap_unordered(lambda f: f(), tasks):
+        for j, metric, subModel in pool.imap_unordered(lambda f: f(), tasks):
             metrics[j] = metric
+            if collectSubModelsParam:
+                subModels[j] = subModel
+
         train.unpersist()
         validation.unpersist()
 
@@ -554,7 +588,7 @@ def _fit(self, dataset):
         else:
             bestIndex = np.argmin(metrics)
         bestModel = est.fit(dataset, epm[bestIndex])
-        return self._copyValues(TrainValidationSplitModel(bestModel, metrics))
+        return self._copyValues(TrainValidationSplitModel(bestModel, metrics, subModels))
 
     @since("2.0.0")
     def copy(self, extra=None):
@@ -598,9 +632,11 @@ def _from_java(cls, java_stage):
         trainRatio = java_stage.getTrainRatio()
         seed = java_stage.getSeed()
         parallelism = java_stage.getParallelism()
+        collectSubModels = java_stage.getCollectSubModels()
         # Create a new instance of this stage.
         py_stage = cls(estimator=estimator, estimatorParamMaps=epms, evaluator=evaluator,
-                       trainRatio=trainRatio, seed=seed, parallelism=parallelism)
+                       trainRatio=trainRatio, seed=seed, parallelism=parallelism,
+                       collectSubModels=collectSubModels)
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
@@ -620,7 +656,7 @@ def _to_java(self):
         _java_obj.setTrainRatio(self.getTrainRatio())
         _java_obj.setSeed(self.getSeed())
         _java_obj.setParallelism(self.getParallelism())
-
+        _java_obj.setCollectSubModels(self.getCollectSubModels())
         return _java_obj
 
 
@@ -633,12 +669,14 @@ class TrainValidationSplitModel(Model, ValidatorParams, MLReadable, MLWritable):
     .. versionadded:: 2.0.0
     """
 
-    def __init__(self, bestModel, validationMetrics=[]):
+    def __init__(self, bestModel, validationMetrics=[], subModels=None):
         super(TrainValidationSplitModel, self).__init__()
-        #: best model from cross validation
+        #: best model from train validation split
         self.bestModel = bestModel
         #: evaluated validation metrics
         self.validationMetrics = validationMetrics
+        #: sub models from train validation split
+        self.subModels = subModels
 
     def _transform(self, dataset):
         return self.bestModel.transform(dataset)
@@ -651,6 +689,7 @@ def copy(self, extra=None):
         creates a deep copy of the embedded paramMap, and
         copies the embedded and extra parameters over.
         And, this creates a shallow copy of the validationMetrics.
+        It does not copy the extra Params into the subModels.
 
         :param extra: Extra parameters to copy to the new instance
         :return: Copy of this instance
@@ -659,7 +698,8 @@ def copy(self, extra=None):
             extra = dict()
         bestModel = self.bestModel.copy(extra)
         validationMetrics = list(self.validationMetrics)
-        return TrainValidationSplitModel(bestModel, validationMetrics)
+        subModels = self.subModels
+        return TrainValidationSplitModel(bestModel, validationMetrics, subModels)
 
     @since("2.3.0")
     def write(self):
@@ -687,6 +727,10 @@ def _from_java(cls, java_stage):
         py_stage = cls(bestModel=bestModel).setEstimator(estimator)
         py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)
 
+        if java_stage.hasSubModels():
+            py_stage.subModels = [JavaParams._from_java(sub_model)
+                                  for sub_model in java_stage.subModels()]
+
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
@@ -708,6 +752,11 @@ def _to_java(self):
         _java_obj.set("evaluator", evaluator)
         _java_obj.set("estimator", estimator)
         _java_obj.set("estimatorParamMaps", epms)
+
+        if self.subModels is not None:
+            java_sub_models = [sub_model._to_java() for sub_model in self.subModels]
+            _java_obj.setSubModels(java_sub_models)
+
         return _java_obj
 
 
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index c3c47bd79459a..a486c6a3fdeb5 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -169,6 +169,10 @@ def overwrite(self):
         self._jwrite.overwrite()
         return self
 
+    def option(self, key, value):
+        self._jwrite.option(key, value)
+        return self
+
     def context(self, sqlContext):
         """
         Sets the SQL context to use for saving.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3757afbd033d9..8269483f0f44e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2094,6 +2094,36 @@ def size(col):
     return Column(sc._jvm.functions.size(_to_java_column(col)))
 
 
+@since(2.4)
+def array_min(col):
+    """
+    Collection function: returns the minimum value of the array.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data'])
+    >>> df.select(array_min(df.data).alias('min')).collect()
+    [Row(min=1), Row(min=-1)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_min(_to_java_column(col)))
+
+
+@since(2.4)
+def array_max(col):
+    """
+    Collection function: returns the maximum value of the array.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data'])
+    >>> df.select(array_max(df.data).alias('max')).collect()
+    [Row(max=3), Row(max=10)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_max(_to_java_column(col)))
+
+
 @since(1.5)
 def sort_array(col, asc=True):
     """
@@ -2107,7 +2137,7 @@ def sort_array(col, asc=True):
     [Row(r=[1, 2, 3]), Row(r=[1]), Row(r=[])]
     >>> df.select(sort_array(df.data, asc=False).alias('r')).collect()
     [Row(r=[3, 2, 1]), Row(r=[1]), Row(r=[])]
-     """
+    """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc))
 
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index dd04ffb4ed393..4e99c8e3c6b10 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -186,16 +186,12 @@ def __init__(self, key, value):
         self.value = value
 
 
-class ReusedSQLTestCase(ReusedPySparkTestCase):
-    @classmethod
-    def setUpClass(cls):
-        ReusedPySparkTestCase.setUpClass()
-        cls.spark = SparkSession(cls.sc)
-
-    @classmethod
-    def tearDownClass(cls):
-        ReusedPySparkTestCase.tearDownClass()
-        cls.spark.stop()
+class SQLTestUtils(object):
+    """
+    This util assumes the instance of this to have 'spark' attribute, having a spark session.
+    It is usually used with 'ReusedSQLTestCase' class but can be used if you feel sure the
+    the implementation of this class has 'spark' attribute.
+    """
 
     @contextmanager
     def sql_conf(self, pairs):
@@ -204,6 +200,7 @@ def sql_conf(self, pairs):
         `value` to the configuration `key` and then restores it back when it exits.
         """
         assert isinstance(pairs, dict), "pairs should be a dictionary."
+        assert hasattr(self, "spark"), "it should have 'spark' attribute, having a spark session."
 
         keys = pairs.keys()
         new_values = pairs.values()
@@ -219,6 +216,18 @@ def sql_conf(self, pairs):
                 else:
                     self.spark.conf.set(key, old_value)
 
+
+class ReusedSQLTestCase(ReusedPySparkTestCase, SQLTestUtils):
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.spark = SparkSession(cls.sc)
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedPySparkTestCase.tearDownClass()
+        cls.spark.stop()
+
     def assertPandasEqual(self, expected, result):
         msg = ("DataFrames are not equal: " +
                "\n\nExpected:\n%s\n%s" % (expected, expected.dtypes) +
@@ -2991,19 +3000,23 @@ def test_create_dateframe_from_pandas_with_dst(self):
                 os.environ['TZ'] = orig_env_tz
             time.tzset()
 
-    def test_2_4_functions(self):
+    def test_sort_with_nulls_order(self):
         from pyspark.sql import functions
 
         df = self.spark.createDataFrame(
             [('Tom', 80), (None, 60), ('Alice', 50)], ["name", "height"])
-        df.select(df.name).orderBy(functions.asc_nulls_first('name')).collect()
-        [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')]
-        df.select(df.name).orderBy(functions.asc_nulls_last('name')).collect()
-        [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)]
-        df.select(df.name).orderBy(functions.desc_nulls_first('name')).collect()
-        [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')]
-        df.select(df.name).orderBy(functions.desc_nulls_last('name')).collect()
-        [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)]
+        self.assertEquals(
+            df.select(df.name).orderBy(functions.asc_nulls_first('name')).collect(),
+            [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')])
+        self.assertEquals(
+            df.select(df.name).orderBy(functions.asc_nulls_last('name')).collect(),
+            [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)])
+        self.assertEquals(
+            df.select(df.name).orderBy(functions.desc_nulls_first('name')).collect(),
+            [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')])
+        self.assertEquals(
+            df.select(df.name).orderBy(functions.desc_nulls_last('name')).collect(),
+            [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)])
 
 
 class HiveSparkSubmitTests(SparkSubmitTests):
@@ -3062,6 +3075,64 @@ def test_sparksession_with_stopped_sparkcontext(self):
             sc.stop()
 
 
+class QueryExecutionListenerTests(unittest.TestCase, SQLTestUtils):
+    # These tests are separate because it uses 'spark.sql.queryExecutionListeners' which is
+    # static and immutable. This can't be set or unset, for example, via `spark.conf`.
+
+    @classmethod
+    def setUpClass(cls):
+        import glob
+        from pyspark.find_spark_home import _find_spark_home
+
+        SPARK_HOME = _find_spark_home()
+        filename_pattern = (
+            "sql/core/target/scala-*/test-classes/org/apache/spark/sql/"
+            "TestQueryExecutionListener.class")
+        if not glob.glob(os.path.join(SPARK_HOME, filename_pattern)):
+            raise unittest.SkipTest(
+                "'org.apache.spark.sql.TestQueryExecutionListener' is not "
+                "available. Will skip the related tests.")
+
+        # Note that 'spark.sql.queryExecutionListeners' is a static immutable configuration.
+        cls.spark = SparkSession.builder \
+            .master("local[4]") \
+            .appName(cls.__name__) \
+            .config(
+                "spark.sql.queryExecutionListeners",
+                "org.apache.spark.sql.TestQueryExecutionListener") \
+            .getOrCreate()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.spark.stop()
+
+    def tearDown(self):
+        self.spark._jvm.OnSuccessCall.clear()
+
+    def test_query_execution_listener_on_collect(self):
+        self.assertFalse(
+            self.spark._jvm.OnSuccessCall.isCalled(),
+            "The callback from the query execution listener should not be called before 'collect'")
+        self.spark.sql("SELECT * FROM range(1)").collect()
+        self.assertTrue(
+            self.spark._jvm.OnSuccessCall.isCalled(),
+            "The callback from the query execution listener should be called after 'collect'")
+
+    @unittest.skipIf(
+        not _have_pandas or not _have_pyarrow,
+        _pandas_requirement_message or _pyarrow_requirement_message)
+    def test_query_execution_listener_on_collect_with_arrow(self):
+        with self.sql_conf({"spark.sql.execution.arrow.enabled": True}):
+            self.assertFalse(
+                self.spark._jvm.OnSuccessCall.isCalled(),
+                "The callback from the query execution listener should not be "
+                "called before 'toPandas'")
+            self.spark.sql("SELECT * FROM range(1)").toPandas()
+            self.assertTrue(
+                self.spark._jvm.OnSuccessCall.isCalled(),
+                "The callback from the query execution listener should be called after 'toPandas'")
+
+
 class SparkSessionTests(PySparkTestCase):
 
     # This test is separate because it's closely related with session's start and stop.
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 82f6c714f3555..4086970ffb256 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -167,5 +167,5 @@ private[spark] object Config extends Logging {
   val KUBERNETES_EXECUTOR_ANNOTATION_PREFIX = "spark.kubernetes.executor.annotation."
   val KUBERNETES_EXECUTOR_SECRETS_PREFIX = "spark.kubernetes.executor.secrets."
 
-  val KUBERNETES_DRIVER_ENV_KEY = "spark.kubernetes.driverEnv."
+  val KUBERNETES_DRIVER_ENV_PREFIX = "spark.kubernetes.driverEnv."
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
new file mode 100644
index 0000000000000..77b634ddfabcc
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s
+
+import io.fabric8.kubernetes.api.model.{LocalObjectReference, LocalObjectReferenceBuilder, Pod}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit.{JavaMainAppResource, MainAppResource}
+import org.apache.spark.internal.config.ConfigEntry
+
+private[spark] sealed trait KubernetesRoleSpecificConf
+
+/*
+ * Structure containing metadata for Kubernetes logic that builds a Spark driver.
+ */
+private[spark] case class KubernetesDriverSpecificConf(
+    mainAppResource: Option[MainAppResource],
+    mainClass: String,
+    appName: String,
+    appArgs: Seq[String]) extends KubernetesRoleSpecificConf
+
+/*
+ * Structure containing metadata for Kubernetes logic that builds a Spark executor.
+ */
+private[spark] case class KubernetesExecutorSpecificConf(
+    executorId: String,
+    driverPod: Pod)
+  extends KubernetesRoleSpecificConf
+
+/**
+ * Structure containing metadata for Kubernetes logic to build Spark pods.
+ */
+private[spark] case class KubernetesConf[T <: KubernetesRoleSpecificConf](
+    sparkConf: SparkConf,
+    roleSpecificConf: T,
+    appResourceNamePrefix: String,
+    appId: String,
+    roleLabels: Map[String, String],
+    roleAnnotations: Map[String, String],
+    roleSecretNamesToMountPaths: Map[String, String],
+    roleEnvs: Map[String, String]) {
+
+  def namespace(): String = sparkConf.get(KUBERNETES_NAMESPACE)
+
+  def sparkJars(): Seq[String] = sparkConf
+    .getOption("spark.jars")
+    .map(str => str.split(",").toSeq)
+    .getOrElse(Seq.empty[String])
+
+  def sparkFiles(): Seq[String] = sparkConf
+    .getOption("spark.files")
+    .map(str => str.split(",").toSeq)
+    .getOrElse(Seq.empty[String])
+
+  def imagePullPolicy(): String = sparkConf.get(CONTAINER_IMAGE_PULL_POLICY)
+
+  def imagePullSecrets(): Seq[LocalObjectReference] = {
+    sparkConf
+      .get(IMAGE_PULL_SECRETS)
+      .map(_.split(","))
+      .getOrElse(Array.empty[String])
+      .map(_.trim)
+      .map { secret =>
+        new LocalObjectReferenceBuilder().withName(secret).build()
+      }
+  }
+
+  def nodeSelector(): Map[String, String] =
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_NODE_SELECTOR_PREFIX)
+
+  def get[T](config: ConfigEntry[T]): T = sparkConf.get(config)
+
+  def get(conf: String): String = sparkConf.get(conf)
+
+  def get(conf: String, defaultValue: String): String = sparkConf.get(conf, defaultValue)
+
+  def getOption(key: String): Option[String] = sparkConf.getOption(key)
+}
+
+private[spark] object KubernetesConf {
+  def createDriverConf(
+      sparkConf: SparkConf,
+      appName: String,
+      appResourceNamePrefix: String,
+      appId: String,
+      mainAppResource: Option[MainAppResource],
+      mainClass: String,
+      appArgs: Array[String]): KubernetesConf[KubernetesDriverSpecificConf] = {
+    val sparkConfWithMainAppJar = sparkConf.clone()
+    mainAppResource.foreach {
+      case JavaMainAppResource(res) =>
+        val previousJars = sparkConf
+          .getOption("spark.jars")
+          .map(_.split(","))
+          .getOrElse(Array.empty)
+        if (!previousJars.contains(res)) {
+          sparkConfWithMainAppJar.setJars(previousJars ++ Seq(res))
+        }
+    }
+
+    val driverCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_DRIVER_LABEL_PREFIX)
+    require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), "Label with key " +
+      s"$SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping " +
+      "operations.")
+    require(!driverCustomLabels.contains(SPARK_ROLE_LABEL), "Label with key " +
+      s"$SPARK_ROLE_LABEL is not allowed as it is reserved for Spark bookkeeping " +
+      "operations.")
+    val driverLabels = driverCustomLabels ++ Map(
+      SPARK_APP_ID_LABEL -> appId,
+      SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE)
+    val driverAnnotations = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_DRIVER_ANNOTATION_PREFIX)
+    val driverSecretNamesToMountPaths = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_DRIVER_SECRETS_PREFIX)
+    val driverEnvs = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_DRIVER_ENV_PREFIX)
+
+    KubernetesConf(
+      sparkConfWithMainAppJar,
+      KubernetesDriverSpecificConf(mainAppResource, mainClass, appName, appArgs),
+      appResourceNamePrefix,
+      appId,
+      driverLabels,
+      driverAnnotations,
+      driverSecretNamesToMountPaths,
+      driverEnvs)
+  }
+
+  def createExecutorConf(
+      sparkConf: SparkConf,
+      executorId: String,
+      appId: String,
+      driverPod: Pod): KubernetesConf[KubernetesExecutorSpecificConf] = {
+    val executorCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_EXECUTOR_LABEL_PREFIX)
+    require(
+      !executorCustomLabels.contains(SPARK_APP_ID_LABEL),
+      s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is reserved for Spark.")
+    require(
+      !executorCustomLabels.contains(SPARK_EXECUTOR_ID_LABEL),
+      s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" +
+        " Spark.")
+    require(
+      !executorCustomLabels.contains(SPARK_ROLE_LABEL),
+      s"Custom executor labels cannot contain $SPARK_ROLE_LABEL as it is reserved for Spark.")
+    val executorLabels = Map(
+      SPARK_EXECUTOR_ID_LABEL -> executorId,
+      SPARK_APP_ID_LABEL -> appId,
+      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++
+      executorCustomLabels
+    val executorAnnotations = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_EXECUTOR_ANNOTATION_PREFIX)
+    val executorSecrets = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_EXECUTOR_SECRETS_PREFIX)
+    val executorEnv = sparkConf.getExecutorEnv.toMap
+
+    KubernetesConf(
+      sparkConf.clone(),
+      KubernetesExecutorSpecificConf(executorId, driverPod),
+      sparkConf.get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX),
+      appId,
+      executorLabels,
+      executorAnnotations,
+      executorSecrets,
+      executorEnv)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsTest.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesDriverSpec.scala
similarity index 57%
rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsTest.scala
rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesDriverSpec.scala
index cf41b22e241af..0c5ae022f4070 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsTest.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesDriverSpec.scala
@@ -16,21 +16,16 @@
  */
 package org.apache.spark.deploy.k8s
 
-import io.fabric8.kubernetes.api.model.LocalObjectReference
+import io.fabric8.kubernetes.api.model.HasMetadata
 
-import org.apache.spark.SparkFunSuite
-
-class KubernetesUtilsTest extends SparkFunSuite {
-
-  test("testParseImagePullSecrets") {
-    val noSecrets = KubernetesUtils.parseImagePullSecrets(None)
-    assert(noSecrets === Nil)
-
-    val oneSecret = KubernetesUtils.parseImagePullSecrets(Some("imagePullSecret"))
-    assert(oneSecret === new LocalObjectReference("imagePullSecret") :: Nil)
-
-    val commaSeparatedSecrets = KubernetesUtils.parseImagePullSecrets(Some("s1, s2  , s3,s4"))
-    assert(commaSeparatedSecrets.map(_.getName) === "s1" :: "s2" :: "s3" :: "s4" :: Nil)
-  }
+private[spark] case class KubernetesDriverSpec(
+    pod: SparkPod,
+    driverKubernetesResources: Seq[HasMetadata],
+    systemProperties: Map[String, String])
 
+private[spark] object KubernetesDriverSpec {
+  def initialSpec(initialProps: Map[String, String]): KubernetesDriverSpec = KubernetesDriverSpec(
+    SparkPod.initialPod(),
+    Seq.empty,
+    initialProps)
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index 5b2bb819cdb14..ee629068ad90d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -37,17 +37,6 @@ private[spark] object KubernetesUtils {
     sparkConf.getAllWithPrefix(prefix).toMap
   }
 
-  /**
-   * Parses comma-separated list of imagePullSecrets into K8s-understandable format
-   */
-  def parseImagePullSecrets(imagePullSecrets: Option[String]): List[LocalObjectReference] = {
-    imagePullSecrets match {
-      case Some(secretsCommaSeparated) =>
-        secretsCommaSeparated.split(',').map(_.trim).map(new LocalObjectReference(_)).toList
-      case None => Nil
-    }
-  }
-
   def requireNandDefined(opt1: Option[_], opt2: Option[_], errMessage: String): Unit = {
     opt1.foreach { _ => require(opt2.isEmpty, errMessage) }
   }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/MountSecretsBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/MountSecretsBootstrap.scala
deleted file mode 100644
index c35e7db51d407..0000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/MountSecretsBootstrap.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s
-
-import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder}
-
-/**
- * Bootstraps a driver or executor container or an init-container with needed secrets mounted.
- */
-private[spark] class MountSecretsBootstrap(secretNamesToMountPaths: Map[String, String]) {
-
-  /**
-   * Add new secret volumes for the secrets specified in secretNamesToMountPaths into the given pod.
-   *
-   * @param pod the pod into which the secret volumes are being added.
-   * @return the updated pod with the secret volumes added.
-   */
-  def addSecretVolumes(pod: Pod): Pod = {
-    var podBuilder = new PodBuilder(pod)
-    secretNamesToMountPaths.keys.foreach { name =>
-      podBuilder = podBuilder
-        .editOrNewSpec()
-          .addNewVolume()
-            .withName(secretVolumeName(name))
-            .withNewSecret()
-              .withSecretName(name)
-              .endSecret()
-            .endVolume()
-          .endSpec()
-    }
-
-    podBuilder.build()
-  }
-
-  /**
-   * Mounts Kubernetes secret volumes of the secrets specified in secretNamesToMountPaths into the
-   * given container.
-   *
-   * @param container the container into which the secret volumes are being mounted.
-   * @return the updated container with the secrets mounted.
-   */
-  def mountSecrets(container: Container): Container = {
-    var containerBuilder = new ContainerBuilder(container)
-    secretNamesToMountPaths.foreach { case (name, path) =>
-      containerBuilder = containerBuilder
-        .addNewVolumeMount()
-          .withName(secretVolumeName(name))
-          .withMountPath(path)
-          .endVolumeMount()
-    }
-
-    containerBuilder.build()
-  }
-
-  private def secretVolumeName(secretName: String): String = {
-    secretName + "-volume"
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
similarity index 64%
rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverConfigurationStep.scala
rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
index 17614e040e587..345dd117fd35f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverConfigurationStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
@@ -14,17 +14,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.deploy.k8s.submit.steps
+package org.apache.spark.deploy.k8s
 
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
+import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder}
 
-/**
- * Represents a step in configuring the Spark driver pod.
- */
-private[spark] trait DriverConfigurationStep {
+private[spark] case class SparkPod(pod: Pod, container: Container)
 
-  /**
-   * Apply some transformation to the previous state of the driver to add a new feature to it.
-   */
-  def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec
+private[spark] object SparkPod {
+  def initialPod(): SparkPod = {
+    SparkPod(
+      new PodBuilder()
+        .withNewMetadata()
+        .endMetadata()
+        .withNewSpec()
+        .endSpec()
+        .build(),
+      new ContainerBuilder().build())
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
new file mode 100644
index 0000000000000..07bdccbe0479d
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, EnvVarSourceBuilder, HasMetadata, PodBuilder, QuantityBuilder}
+
+import org.apache.spark.SparkException
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, KubernetesUtils, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
+
+private[spark] class BasicDriverFeatureStep(
+    conf: KubernetesConf[KubernetesDriverSpecificConf])
+  extends KubernetesFeatureConfigStep {
+
+  private val driverPodName = conf
+    .get(KUBERNETES_DRIVER_POD_NAME)
+    .getOrElse(s"${conf.appResourceNamePrefix}-driver")
+
+  private val driverContainerImage = conf
+    .get(DRIVER_CONTAINER_IMAGE)
+    .getOrElse(throw new SparkException("Must specify the driver container image"))
+
+  // CPU settings
+  private val driverCpuCores = conf.get("spark.driver.cores", "1")
+  private val driverLimitCores = conf.get(KUBERNETES_DRIVER_LIMIT_CORES)
+
+  // Memory settings
+  private val driverMemoryMiB = conf.get(DRIVER_MEMORY)
+  private val memoryOverheadMiB = conf
+    .get(DRIVER_MEMORY_OVERHEAD)
+    .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMiB).toInt, MEMORY_OVERHEAD_MIN_MIB))
+  private val driverMemoryWithOverheadMiB = driverMemoryMiB + memoryOverheadMiB
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val driverCustomEnvs = conf.roleEnvs
+      .toSeq
+      .map { env =>
+        new EnvVarBuilder()
+          .withName(env._1)
+          .withValue(env._2)
+          .build()
+      }
+
+    val driverCpuQuantity = new QuantityBuilder(false)
+      .withAmount(driverCpuCores)
+      .build()
+    val driverMemoryQuantity = new QuantityBuilder(false)
+      .withAmount(s"${driverMemoryWithOverheadMiB}Mi")
+      .build()
+    val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
+      ("cpu", new QuantityBuilder(false).withAmount(limitCores).build())
+    }
+
+    val driverContainer = new ContainerBuilder(pod.container)
+      .withName(DRIVER_CONTAINER_NAME)
+      .withImage(driverContainerImage)
+      .withImagePullPolicy(conf.imagePullPolicy())
+      .addAllToEnv(driverCustomEnvs.asJava)
+      .addNewEnv()
+        .withName(ENV_DRIVER_BIND_ADDRESS)
+        .withValueFrom(new EnvVarSourceBuilder()
+          .withNewFieldRef("v1", "status.podIP")
+          .build())
+        .endEnv()
+      .withNewResources()
+        .addToRequests("cpu", driverCpuQuantity)
+        .addToLimits(maybeCpuLimitQuantity.toMap.asJava)
+        .addToRequests("memory", driverMemoryQuantity)
+        .addToLimits("memory", driverMemoryQuantity)
+        .endResources()
+      .addToArgs("driver")
+      .addToArgs("--properties-file", SPARK_CONF_PATH)
+      .addToArgs("--class", conf.roleSpecificConf.mainClass)
+      // The user application jar is merged into the spark.jars list and managed through that
+      // property, so there is no need to reference it explicitly here.
+      .addToArgs(SparkLauncher.NO_RESOURCE)
+      .addToArgs(conf.roleSpecificConf.appArgs: _*)
+      .build()
+
+    val driverPod = new PodBuilder(pod.pod)
+      .editOrNewMetadata()
+        .withName(driverPodName)
+        .addToLabels(conf.roleLabels.asJava)
+        .addToAnnotations(conf.roleAnnotations.asJava)
+        .endMetadata()
+      .withNewSpec()
+        .withRestartPolicy("Never")
+        .withNodeSelector(conf.nodeSelector().asJava)
+        .addToImagePullSecrets(conf.imagePullSecrets(): _*)
+        .endSpec()
+      .build()
+    SparkPod(driverPod, driverContainer)
+  }
+
+  override def getAdditionalPodSystemProperties(): Map[String, String] = {
+    val additionalProps = mutable.Map(
+      KUBERNETES_DRIVER_POD_NAME.key -> driverPodName,
+      "spark.app.id" -> conf.appId,
+      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> conf.appResourceNamePrefix,
+      KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true")
+
+    val resolvedSparkJars = KubernetesUtils.resolveFileUrisAndPath(
+      conf.sparkJars())
+    val resolvedSparkFiles = KubernetesUtils.resolveFileUrisAndPath(
+      conf.sparkFiles())
+    if (resolvedSparkJars.nonEmpty) {
+      additionalProps.put("spark.jars", resolvedSparkJars.mkString(","))
+    }
+    if (resolvedSparkFiles.nonEmpty) {
+      additionalProps.put("spark.files", resolvedSparkFiles.mkString(","))
+    }
+    additionalProps.toMap
+  }
+
+  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
new file mode 100644
index 0000000000000..d22097587aafe
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, ContainerPortBuilder, EnvVar, EnvVarBuilder, EnvVarSourceBuilder, HasMetadata, PodBuilder, QuantityBuilder}
+
+import org.apache.spark.SparkException
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.internal.config.{EXECUTOR_CLASS_PATH, EXECUTOR_JAVA_OPTIONS, EXECUTOR_MEMORY, EXECUTOR_MEMORY_OVERHEAD}
+import org.apache.spark.rpc.RpcEndpointAddress
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+import org.apache.spark.util.Utils
+
+private[spark] class BasicExecutorFeatureStep(
+    kubernetesConf: KubernetesConf[KubernetesExecutorSpecificConf])
+  extends KubernetesFeatureConfigStep {
+
+  // Consider moving some of these fields to KubernetesConf or KubernetesExecutorSpecificConf
+  private val executorExtraClasspath = kubernetesConf.get(EXECUTOR_CLASS_PATH)
+  private val executorContainerImage = kubernetesConf
+    .get(EXECUTOR_CONTAINER_IMAGE)
+    .getOrElse(throw new SparkException("Must specify the executor container image"))
+  private val blockManagerPort = kubernetesConf
+    .sparkConf
+    .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT)
+
+  private val executorPodNamePrefix = kubernetesConf.appResourceNamePrefix
+
+  private val driverUrl = RpcEndpointAddress(
+    kubernetesConf.get("spark.driver.host"),
+    kubernetesConf.sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT),
+    CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
+  private val executorMemoryMiB = kubernetesConf.get(EXECUTOR_MEMORY)
+  private val executorMemoryString = kubernetesConf.get(
+    EXECUTOR_MEMORY.key, EXECUTOR_MEMORY.defaultValueString)
+
+  private val memoryOverheadMiB = kubernetesConf
+    .get(EXECUTOR_MEMORY_OVERHEAD)
+    .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * executorMemoryMiB).toInt,
+      MEMORY_OVERHEAD_MIN_MIB))
+  private val executorMemoryWithOverhead = executorMemoryMiB + memoryOverheadMiB
+
+  private val executorCores = kubernetesConf.sparkConf.getInt("spark.executor.cores", 1)
+  private val executorCoresRequest =
+    if (kubernetesConf.sparkConf.contains(KUBERNETES_EXECUTOR_REQUEST_CORES)) {
+      kubernetesConf.get(KUBERNETES_EXECUTOR_REQUEST_CORES).get
+    } else {
+      executorCores.toString
+    }
+  private val executorLimitCores = kubernetesConf.get(KUBERNETES_EXECUTOR_LIMIT_CORES)
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val name = s"$executorPodNamePrefix-exec-${kubernetesConf.roleSpecificConf.executorId}"
+
+    // hostname must be no longer than 63 characters, so take the last 63 characters of the pod
+    // name as the hostname.  This preserves uniqueness since the end of name contains
+    // executorId
+    val hostname = name.substring(Math.max(0, name.length - 63))
+    val executorMemoryQuantity = new QuantityBuilder(false)
+      .withAmount(s"${executorMemoryWithOverhead}Mi")
+      .build()
+    val executorCpuQuantity = new QuantityBuilder(false)
+      .withAmount(executorCoresRequest)
+      .build()
+    val executorExtraClasspathEnv = executorExtraClasspath.map { cp =>
+      new EnvVarBuilder()
+        .withName(ENV_CLASSPATH)
+        .withValue(cp)
+        .build()
+    }
+    val executorExtraJavaOptionsEnv = kubernetesConf
+      .get(EXECUTOR_JAVA_OPTIONS)
+      .map { opts =>
+        val delimitedOpts = Utils.splitCommandString(opts)
+        delimitedOpts.zipWithIndex.map {
+          case (opt, index) =>
+            new EnvVarBuilder().withName(s"$ENV_JAVA_OPT_PREFIX$index").withValue(opt).build()
+        }
+      }.getOrElse(Seq.empty[EnvVar])
+    val executorEnv = (Seq(
+      (ENV_DRIVER_URL, driverUrl),
+      (ENV_EXECUTOR_CORES, executorCores.toString),
+      (ENV_EXECUTOR_MEMORY, executorMemoryString),
+      (ENV_APPLICATION_ID, kubernetesConf.appId),
+      // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
+      (ENV_SPARK_CONF_DIR, SPARK_CONF_DIR_INTERNAL),
+      (ENV_EXECUTOR_ID, kubernetesConf.roleSpecificConf.executorId)) ++
+      kubernetesConf.roleEnvs)
+      .map(env => new EnvVarBuilder()
+        .withName(env._1)
+        .withValue(env._2)
+        .build()
+      ) ++ Seq(
+      new EnvVarBuilder()
+        .withName(ENV_EXECUTOR_POD_IP)
+        .withValueFrom(new EnvVarSourceBuilder()
+          .withNewFieldRef("v1", "status.podIP")
+          .build())
+        .build()
+    ) ++ executorExtraJavaOptionsEnv ++ executorExtraClasspathEnv.toSeq
+    val requiredPorts = Seq(
+      (BLOCK_MANAGER_PORT_NAME, blockManagerPort))
+      .map { case (name, port) =>
+        new ContainerPortBuilder()
+          .withName(name)
+          .withContainerPort(port)
+          .build()
+      }
+
+    val executorContainer = new ContainerBuilder(pod.container)
+      .withName("executor")
+      .withImage(executorContainerImage)
+      .withImagePullPolicy(kubernetesConf.imagePullPolicy())
+      .withNewResources()
+        .addToRequests("memory", executorMemoryQuantity)
+        .addToLimits("memory", executorMemoryQuantity)
+        .addToRequests("cpu", executorCpuQuantity)
+        .endResources()
+      .addAllToEnv(executorEnv.asJava)
+      .withPorts(requiredPorts.asJava)
+      .addToArgs("executor")
+      .build()
+    val containerWithLimitCores = executorLimitCores.map { limitCores =>
+      val executorCpuLimitQuantity = new QuantityBuilder(false)
+        .withAmount(limitCores)
+        .build()
+      new ContainerBuilder(executorContainer)
+        .editResources()
+          .addToLimits("cpu", executorCpuLimitQuantity)
+          .endResources()
+        .build()
+    }.getOrElse(executorContainer)
+    val driverPod = kubernetesConf.roleSpecificConf.driverPod
+    val executorPod = new PodBuilder(pod.pod)
+      .editOrNewMetadata()
+        .withName(name)
+        .withLabels(kubernetesConf.roleLabels.asJava)
+        .withAnnotations(kubernetesConf.roleAnnotations.asJava)
+        .withOwnerReferences()
+        .addNewOwnerReference()
+          .withController(true)
+          .withApiVersion(driverPod.getApiVersion)
+          .withKind(driverPod.getKind)
+          .withName(driverPod.getMetadata.getName)
+          .withUid(driverPod.getMetadata.getUid)
+          .endOwnerReference()
+        .endMetadata()
+      .editOrNewSpec()
+        .withHostname(hostname)
+        .withRestartPolicy("Never")
+        .withNodeSelector(kubernetesConf.nodeSelector().asJava)
+        .addToImagePullSecrets(kubernetesConf.imagePullSecrets(): _*)
+        .endSpec()
+      .build()
+    SparkPod(executorPod, containerWithLimitCores)
+  }
+
+  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
+
+  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala
new file mode 100644
index 0000000000000..ff5ad6673b309
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+
+import scala.collection.JavaConverters._
+
+import com.google.common.io.{BaseEncoding, Files}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, Secret, SecretBuilder}
+
+import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+
+private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: KubernetesConf[_])
+  extends KubernetesFeatureConfigStep {
+  // TODO clean up this class, and credentials in general. See also SparkKubernetesClientFactory.
+  // We should use a struct to hold all creds-related fields. A lot of the code is very repetitive.
+
+  private val maybeMountedOAuthTokenFile = kubernetesConf.getOption(
+    s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX")
+  private val maybeMountedClientKeyFile = kubernetesConf.getOption(
+    s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX")
+  private val maybeMountedClientCertFile = kubernetesConf.getOption(
+    s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX")
+  private val maybeMountedCaCertFile = kubernetesConf.getOption(
+    s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX")
+  private val driverServiceAccount = kubernetesConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME)
+
+  private val oauthTokenBase64 = kubernetesConf
+    .getOption(s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX")
+    .map { token =>
+      BaseEncoding.base64().encode(token.getBytes(StandardCharsets.UTF_8))
+    }
+
+  private val caCertDataBase64 = safeFileConfToBase64(
+    s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX",
+    "Driver CA cert file")
+  private val clientKeyDataBase64 = safeFileConfToBase64(
+    s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX",
+    "Driver client key file")
+  private val clientCertDataBase64 = safeFileConfToBase64(
+    s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX",
+    "Driver client cert file")
+
+  // TODO decide whether or not to apply this step entirely in the caller, i.e. the builder.
+  private val shouldMountSecret = oauthTokenBase64.isDefined ||
+    caCertDataBase64.isDefined ||
+    clientKeyDataBase64.isDefined ||
+    clientCertDataBase64.isDefined
+
+  private val driverCredentialsSecretName =
+    s"${kubernetesConf.appResourceNamePrefix}-kubernetes-credentials"
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    if (!shouldMountSecret) {
+      pod.copy(
+        pod = driverServiceAccount.map { account =>
+          new PodBuilder(pod.pod)
+            .editOrNewSpec()
+              .withServiceAccount(account)
+              .withServiceAccountName(account)
+              .endSpec()
+            .build()
+        }.getOrElse(pod.pod))
+    } else {
+      val driverPodWithMountedKubernetesCredentials =
+        new PodBuilder(pod.pod)
+          .editOrNewSpec()
+            .addNewVolume()
+              .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)
+              .withNewSecret().withSecretName(driverCredentialsSecretName).endSecret()
+              .endVolume()
+          .endSpec()
+          .build()
+
+      val driverContainerWithMountedSecretVolume =
+        new ContainerBuilder(pod.container)
+          .addNewVolumeMount()
+            .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)
+            .withMountPath(DRIVER_CREDENTIALS_SECRETS_BASE_DIR)
+            .endVolumeMount()
+          .build()
+      SparkPod(driverPodWithMountedKubernetesCredentials, driverContainerWithMountedSecretVolume)
+    }
+  }
+
+  override def getAdditionalPodSystemProperties(): Map[String, String] = {
+    val resolvedMountedOAuthTokenFile = resolveSecretLocation(
+      maybeMountedOAuthTokenFile,
+      oauthTokenBase64,
+      DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH)
+    val resolvedMountedClientKeyFile = resolveSecretLocation(
+      maybeMountedClientKeyFile,
+      clientKeyDataBase64,
+      DRIVER_CREDENTIALS_CLIENT_KEY_PATH)
+    val resolvedMountedClientCertFile = resolveSecretLocation(
+      maybeMountedClientCertFile,
+      clientCertDataBase64,
+      DRIVER_CREDENTIALS_CLIENT_CERT_PATH)
+    val resolvedMountedCaCertFile = resolveSecretLocation(
+      maybeMountedCaCertFile,
+      caCertDataBase64,
+      DRIVER_CREDENTIALS_CA_CERT_PATH)
+
+    val redactedTokens = kubernetesConf.sparkConf.getAll
+      .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX))
+      .toMap
+      .mapValues( _ => "<present_but_redacted>")
+    redactedTokens ++
+      resolvedMountedCaCertFile.map { file =>
+        Map(
+          s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX" ->
+            file)
+      }.getOrElse(Map.empty) ++
+      resolvedMountedClientKeyFile.map { file =>
+        Map(
+          s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX" ->
+            file)
+      }.getOrElse(Map.empty) ++
+      resolvedMountedClientCertFile.map { file =>
+        Map(
+          s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX" ->
+            file)
+      }.getOrElse(Map.empty) ++
+      resolvedMountedOAuthTokenFile.map { file =>
+        Map(
+          s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX" ->
+          file)
+      }.getOrElse(Map.empty)
+  }
+
+  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
+    if (shouldMountSecret) {
+      Seq(createCredentialsSecret())
+    } else {
+      Seq.empty
+    }
+  }
+
+  private def safeFileConfToBase64(conf: String, fileType: String): Option[String] = {
+    kubernetesConf.getOption(conf)
+      .map(new File(_))
+      .map { file =>
+        require(file.isFile, String.format("%s provided at %s does not exist or is not a file.",
+          fileType, file.getAbsolutePath))
+        BaseEncoding.base64().encode(Files.toByteArray(file))
+      }
+  }
+
+  /**
+   * Resolve a Kubernetes secret data entry from an optional client credential used by the
+   * driver to talk to the Kubernetes API server.
+   *
+   * @param userSpecifiedCredential the optional user-specified client credential.
+   * @param secretName name of the Kubernetes secret storing the client credential.
+   * @return a secret data entry in the form of a map from the secret name to the secret data,
+   *         which may be empty if the user-specified credential is empty.
+   */
+  private def resolveSecretData(
+    userSpecifiedCredential: Option[String],
+    secretName: String): Map[String, String] = {
+    userSpecifiedCredential.map { valueBase64 =>
+      Map(secretName -> valueBase64)
+    }.getOrElse(Map.empty[String, String])
+  }
+
+  private def resolveSecretLocation(
+    mountedUserSpecified: Option[String],
+    valueMountedFromSubmitter: Option[String],
+    mountedCanonicalLocation: String): Option[String] = {
+    mountedUserSpecified.orElse(valueMountedFromSubmitter.map { _ =>
+      mountedCanonicalLocation
+    })
+  }
+
+  private def createCredentialsSecret(): Secret = {
+    val allSecretData =
+      resolveSecretData(
+        clientKeyDataBase64,
+        DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME) ++
+        resolveSecretData(
+          clientCertDataBase64,
+          DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME) ++
+        resolveSecretData(
+          caCertDataBase64,
+          DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME) ++
+        resolveSecretData(
+          oauthTokenBase64,
+          DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME)
+
+    new SecretBuilder()
+      .withNewMetadata()
+        .withName(driverCredentialsSecretName)
+        .endMetadata()
+      .withData(allSecretData.asJava)
+      .build()
+  }
+
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
new file mode 100644
index 0000000000000..f2d7bbd08f305
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model.{HasMetadata, ServiceBuilder}
+
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.internal.Logging
+import org.apache.spark.util.{Clock, SystemClock}
+
+private[spark] class DriverServiceFeatureStep(
+    kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf],
+    clock: Clock = new SystemClock)
+  extends KubernetesFeatureConfigStep with Logging {
+  import DriverServiceFeatureStep._
+
+  require(kubernetesConf.getOption(DRIVER_BIND_ADDRESS_KEY).isEmpty,
+    s"$DRIVER_BIND_ADDRESS_KEY is not supported in Kubernetes mode, as the driver's bind " +
+      "address is managed and set to the driver pod's IP address.")
+  require(kubernetesConf.getOption(DRIVER_HOST_KEY).isEmpty,
+    s"$DRIVER_HOST_KEY is not supported in Kubernetes mode, as the driver's hostname will be " +
+      "managed via a Kubernetes service.")
+
+  private val preferredServiceName = s"${kubernetesConf.appResourceNamePrefix}$DRIVER_SVC_POSTFIX"
+  private val resolvedServiceName = if (preferredServiceName.length <= MAX_SERVICE_NAME_LENGTH) {
+    preferredServiceName
+  } else {
+    val randomServiceId = clock.getTimeMillis()
+    val shorterServiceName = s"spark-$randomServiceId$DRIVER_SVC_POSTFIX"
+    logWarning(s"Driver's hostname would preferably be $preferredServiceName, but this is " +
+      s"too long (must be <= $MAX_SERVICE_NAME_LENGTH characters). Falling back to use " +
+      s"$shorterServiceName as the driver service's name.")
+    shorterServiceName
+  }
+
+  private val driverPort = kubernetesConf.sparkConf.getInt(
+    "spark.driver.port", DEFAULT_DRIVER_PORT)
+  private val driverBlockManagerPort = kubernetesConf.sparkConf.getInt(
+    org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT.key, DEFAULT_BLOCKMANAGER_PORT)
+
+  override def configurePod(pod: SparkPod): SparkPod = pod
+
+  override def getAdditionalPodSystemProperties(): Map[String, String] = {
+    val driverHostname = s"$resolvedServiceName.${kubernetesConf.namespace()}.svc"
+    Map(DRIVER_HOST_KEY -> driverHostname,
+      "spark.driver.port" -> driverPort.toString,
+      org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT.key ->
+        driverBlockManagerPort.toString)
+  }
+
+  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
+    val driverService = new ServiceBuilder()
+      .withNewMetadata()
+        .withName(resolvedServiceName)
+        .endMetadata()
+      .withNewSpec()
+        .withClusterIP("None")
+        .withSelector(kubernetesConf.roleLabels.asJava)
+        .addNewPort()
+          .withName(DRIVER_PORT_NAME)
+          .withPort(driverPort)
+          .withNewTargetPort(driverPort)
+          .endPort()
+        .addNewPort()
+          .withName(BLOCK_MANAGER_PORT_NAME)
+          .withPort(driverBlockManagerPort)
+          .withNewTargetPort(driverBlockManagerPort)
+          .endPort()
+        .endSpec()
+      .build()
+    Seq(driverService)
+  }
+}
+
+private[spark] object DriverServiceFeatureStep {
+  val DRIVER_BIND_ADDRESS_KEY = org.apache.spark.internal.config.DRIVER_BIND_ADDRESS.key
+  val DRIVER_HOST_KEY = org.apache.spark.internal.config.DRIVER_HOST_ADDRESS.key
+  val DRIVER_SVC_POSTFIX = "-driver-svc"
+  val MAX_SERVICE_NAME_LENGTH = 63
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala
new file mode 100644
index 0000000000000..4c1be3bb13293
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import io.fabric8.kubernetes.api.model.HasMetadata
+
+import org.apache.spark.deploy.k8s.SparkPod
+
+/**
+ * A collection of functions that together represent a "feature" in pods that are launched for
+ * Spark drivers and executors.
+ */
+private[spark] trait KubernetesFeatureConfigStep {
+
+  /**
+   * Apply modifications on the given pod in accordance to this feature. This can include attaching
+   * volumes, adding environment variables, and adding labels/annotations.
+   * <p>
+   * Note that we should return a SparkPod that keeps all of the properties of the passed SparkPod
+   * object. So this is correct:
+   * <pre>
+   * {@code val configuredPod = new PodBuilder(pod.pod)
+   *     .editSpec()
+   *     ...
+   *     .build()
+   *   val configuredContainer = new ContainerBuilder(pod.container)
+   *     ...
+   *     .build()
+   *   SparkPod(configuredPod, configuredContainer)
+   *  }
+   * </pre>
+   * This is incorrect:
+   * <pre>
+   * {@code val configuredPod = new PodBuilder() // Loses the original state
+   *     .editSpec()
+   *     ...
+   *     .build()
+   *   val configuredContainer = new ContainerBuilder() // Loses the original state
+   *     ...
+   *     .build()
+   *   SparkPod(configuredPod, configuredContainer)
+   *  }
+   * </pre>
+   */
+  def configurePod(pod: SparkPod): SparkPod
+
+  /**
+   * Return any system properties that should be set on the JVM in accordance to this feature.
+   */
+  def getAdditionalPodSystemProperties(): Map[String, String]
+
+  /**
+   * Return any additional Kubernetes resources that should be added to support this feature. Only
+   * applicable when creating the driver in cluster mode.
+   */
+  def getAdditionalKubernetesResources(): Seq[HasMetadata]
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
new file mode 100644
index 0000000000000..97fa9499b2edb
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, VolumeBuilder, VolumeMountBuilder}
+
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesRoleSpecificConf, SparkPod}
+
+private[spark] class MountSecretsFeatureStep(
+    kubernetesConf: KubernetesConf[_ <: KubernetesRoleSpecificConf])
+  extends KubernetesFeatureConfigStep {
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val addedVolumes = kubernetesConf
+      .roleSecretNamesToMountPaths
+      .keys
+      .map(secretName =>
+        new VolumeBuilder()
+          .withName(secretVolumeName(secretName))
+          .withNewSecret()
+            .withSecretName(secretName)
+            .endSecret()
+          .build())
+    val podWithVolumes = new PodBuilder(pod.pod)
+      .editOrNewSpec()
+        .addToVolumes(addedVolumes.toSeq: _*)
+        .endSpec()
+      .build()
+    val addedVolumeMounts = kubernetesConf
+      .roleSecretNamesToMountPaths
+      .map {
+        case (secretName, mountPath) =>
+          new VolumeMountBuilder()
+            .withName(secretVolumeName(secretName))
+            .withMountPath(mountPath)
+            .build()
+      }
+    val containerWithMounts = new ContainerBuilder(pod.container)
+      .addToVolumeMounts(addedVolumeMounts.toSeq: _*)
+      .build()
+    SparkPod(podWithVolumes, containerWithMounts)
+  }
+
+  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
+
+  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
+
+  private def secretVolumeName(secretName: String): String = s"$secretName-volume"
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/DriverConfigOrchestrator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/DriverConfigOrchestrator.scala
deleted file mode 100644
index b4d3f04a1bc32..0000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/DriverConfigOrchestrator.scala
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit
-
-import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.deploy.k8s.{KubernetesUtils, MountSecretsBootstrap}
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.steps._
-import org.apache.spark.launcher.SparkLauncher
-import org.apache.spark.util.SystemClock
-import org.apache.spark.util.Utils
-
-/**
- * Figures out and returns the complete ordered list of needed DriverConfigurationSteps to
- * configure the Spark driver pod. The returned steps will be applied one by one in the given
- * order to produce a final KubernetesDriverSpec that is used in KubernetesClientApplication
- * to construct and create the driver pod.
- */
-private[spark] class DriverConfigOrchestrator(
-    kubernetesAppId: String,
-    kubernetesResourceNamePrefix: String,
-    mainAppResource: Option[MainAppResource],
-    appName: String,
-    mainClass: String,
-    appArgs: Array[String],
-    sparkConf: SparkConf) {
-
-  // The resource name prefix is derived from the Spark application name, making it easy to connect
-  // the names of the Kubernetes resources from e.g. kubectl or the Kubernetes dashboard to the
-  // application the user submitted.
-
-  private val imagePullPolicy = sparkConf.get(CONTAINER_IMAGE_PULL_POLICY)
-
-  def getAllConfigurationSteps: Seq[DriverConfigurationStep] = {
-    val driverCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf,
-      KUBERNETES_DRIVER_LABEL_PREFIX)
-    require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), "Label with key " +
-      s"$SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping " +
-      "operations.")
-    require(!driverCustomLabels.contains(SPARK_ROLE_LABEL), "Label with key " +
-      s"$SPARK_ROLE_LABEL is not allowed as it is reserved for Spark bookkeeping " +
-      "operations.")
-
-    val secretNamesToMountPaths = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf,
-      KUBERNETES_DRIVER_SECRETS_PREFIX)
-
-    val allDriverLabels = driverCustomLabels ++ Map(
-      SPARK_APP_ID_LABEL -> kubernetesAppId,
-      SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE)
-
-    val initialSubmissionStep = new BasicDriverConfigurationStep(
-      kubernetesAppId,
-      kubernetesResourceNamePrefix,
-      allDriverLabels,
-      imagePullPolicy,
-      appName,
-      mainClass,
-      appArgs,
-      sparkConf)
-
-    val serviceBootstrapStep = new DriverServiceBootstrapStep(
-      kubernetesResourceNamePrefix,
-      allDriverLabels,
-      sparkConf,
-      new SystemClock)
-
-    val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep(
-      sparkConf, kubernetesResourceNamePrefix)
-
-    val additionalMainAppJar = if (mainAppResource.nonEmpty) {
-       val mayBeResource = mainAppResource.get match {
-        case JavaMainAppResource(resource) if resource != SparkLauncher.NO_RESOURCE =>
-          Some(resource)
-        case _ => None
-      }
-      mayBeResource
-    } else {
-      None
-    }
-
-    val sparkJars = sparkConf.getOption("spark.jars")
-      .map(_.split(","))
-      .getOrElse(Array.empty[String]) ++
-      additionalMainAppJar.toSeq
-    val sparkFiles = sparkConf.getOption("spark.files")
-      .map(_.split(","))
-      .getOrElse(Array.empty[String])
-
-    // TODO(SPARK-23153): remove once submission client local dependencies are supported.
-    if (existSubmissionLocalFiles(sparkJars) || existSubmissionLocalFiles(sparkFiles)) {
-      throw new SparkException("The Kubernetes mode does not yet support referencing application " +
-        "dependencies in the local file system.")
-    }
-
-    val dependencyResolutionStep = if (sparkJars.nonEmpty || sparkFiles.nonEmpty) {
-      Seq(new DependencyResolutionStep(
-        sparkJars,
-        sparkFiles))
-    } else {
-      Nil
-    }
-
-    val mountSecretsStep = if (secretNamesToMountPaths.nonEmpty) {
-      Seq(new DriverMountSecretsStep(new MountSecretsBootstrap(secretNamesToMountPaths)))
-    } else {
-      Nil
-    }
-
-    Seq(
-      initialSubmissionStep,
-      serviceBootstrapStep,
-      kubernetesCredentialsStep) ++
-      dependencyResolutionStep ++
-      mountSecretsStep
-  }
-
-  private def existSubmissionLocalFiles(files: Seq[String]): Boolean = {
-    files.exists { uri =>
-      Utils.resolveURI(uri).getScheme == "file"
-    }
-  }
-
-  private def existNonContainerLocalFiles(files: Seq[String]): Boolean = {
-    files.exists { uri =>
-      Utils.resolveURI(uri).getScheme != "local"
-    }
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index e16d1add600b2..a97f5650fb869 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -27,12 +27,10 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkApplication
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkKubernetesClientFactory}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.SparkKubernetesClientFactory
-import org.apache.spark.deploy.k8s.submit.steps.DriverConfigurationStep
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.ConfigBuilder
 import org.apache.spark.util.Utils
 
 /**
@@ -43,9 +41,9 @@ import org.apache.spark.util.Utils
  * @param driverArgs arguments to the driver
  */
 private[spark] case class ClientArguments(
-     mainAppResource: Option[MainAppResource],
-     mainClass: String,
-     driverArgs: Array[String])
+    mainAppResource: Option[MainAppResource],
+    mainClass: String,
+    driverArgs: Array[String])
 
 private[spark] object ClientArguments {
 
@@ -80,8 +78,9 @@ private[spark] object ClientArguments {
  * watcher that monitors and logs the application status. Waits for the application to terminate if
  * spark.kubernetes.submission.waitAppCompletion is true.
  *
- * @param submissionSteps steps that collectively configure the driver
- * @param sparkConf the submission client Spark configuration
+ * @param builder Responsible for building the base driver pod based on a composition of
+ *                implemented features.
+ * @param kubernetesConf application configuration
  * @param kubernetesClient the client to talk to the Kubernetes API server
  * @param waitForAppCompletion a flag indicating whether the client should wait for the application
  *                             to complete
@@ -89,31 +88,21 @@ private[spark] object ClientArguments {
  * @param watcher a watcher that monitors and logs the application status
  */
 private[spark] class Client(
-    submissionSteps: Seq[DriverConfigurationStep],
-    sparkConf: SparkConf,
+    builder: KubernetesDriverBuilder,
+    kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf],
     kubernetesClient: KubernetesClient,
     waitForAppCompletion: Boolean,
     appName: String,
     watcher: LoggingPodStatusWatcher,
     kubernetesResourceNamePrefix: String) extends Logging {
 
-   /**
-    * Run command that initializes a DriverSpec that will be updated after each
-    * DriverConfigurationStep in the sequence that is passed in. The final KubernetesDriverSpec
-    * will be used to build the Driver Container, Driver Pod, and Kubernetes Resources
-    */
   def run(): Unit = {
-    var currentDriverSpec = KubernetesDriverSpec.initialSpec(sparkConf)
-    // submissionSteps contain steps necessary to take, to resolve varying
-    // client arguments that are passed in, created by orchestrator
-    for (nextStep <- submissionSteps) {
-      currentDriverSpec = nextStep.configureDriver(currentDriverSpec)
-    }
+    val resolvedDriverSpec = builder.buildFromFeatures(kubernetesConf)
     val configMapName = s"$kubernetesResourceNamePrefix-driver-conf-map"
-    val configMap = buildConfigMap(configMapName, currentDriverSpec.driverSparkConf)
+    val configMap = buildConfigMap(configMapName, resolvedDriverSpec.systemProperties)
     // The include of the ENV_VAR for "SPARK_CONF_DIR" is to allow for the
     // Spark command builder to pickup on the Java Options present in the ConfigMap
-    val resolvedDriverContainer = new ContainerBuilder(currentDriverSpec.driverContainer)
+    val resolvedDriverContainer = new ContainerBuilder(resolvedDriverSpec.pod.container)
       .addNewEnv()
         .withName(ENV_SPARK_CONF_DIR)
         .withValue(SPARK_CONF_DIR_INTERNAL)
@@ -123,7 +112,7 @@ private[spark] class Client(
         .withMountPath(SPARK_CONF_DIR_INTERNAL)
         .endVolumeMount()
       .build()
-    val resolvedDriverPod = new PodBuilder(currentDriverSpec.driverPod)
+    val resolvedDriverPod = new PodBuilder(resolvedDriverSpec.pod.pod)
       .editSpec()
         .addToContainers(resolvedDriverContainer)
         .addNewVolume()
@@ -141,12 +130,10 @@ private[spark] class Client(
         .watch(watcher)) { _ =>
       val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod)
       try {
-        if (currentDriverSpec.otherKubernetesResources.nonEmpty) {
-          val otherKubernetesResources =
-            currentDriverSpec.otherKubernetesResources ++ Seq(configMap)
-          addDriverOwnerReference(createdDriverPod, otherKubernetesResources)
-          kubernetesClient.resourceList(otherKubernetesResources: _*).createOrReplace()
-        }
+        val otherKubernetesResources =
+          resolvedDriverSpec.driverKubernetesResources ++ Seq(configMap)
+        addDriverOwnerReference(createdDriverPod, otherKubernetesResources)
+        kubernetesClient.resourceList(otherKubernetesResources: _*).createOrReplace()
       } catch {
         case NonFatal(e) =>
           kubernetesClient.pods().delete(createdDriverPod)
@@ -180,20 +167,17 @@ private[spark] class Client(
   }
 
   // Build a Config Map that will house spark conf properties in a single file for spark-submit
-  private def buildConfigMap(configMapName: String, conf: SparkConf): ConfigMap = {
+  private def buildConfigMap(configMapName: String, conf: Map[String, String]): ConfigMap = {
     val properties = new Properties()
-    conf.getAll.foreach { case (k, v) =>
+    conf.foreach { case (k, v) =>
       properties.setProperty(k, v)
     }
     val propertiesWriter = new StringWriter()
     properties.store(propertiesWriter,
       s"Java properties built from Kubernetes config map with name: $configMapName")
-
-    val namespace = conf.get(KUBERNETES_NAMESPACE)
     new ConfigMapBuilder()
       .withNewMetadata()
         .withName(configMapName)
-        .withNamespace(namespace)
         .endMetadata()
       .addToData(SPARK_CONF_FILE_NAME, propertiesWriter.toString)
       .build()
@@ -211,7 +195,7 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
   }
 
   private def run(clientArguments: ClientArguments, sparkConf: SparkConf): Unit = {
-    val namespace = sparkConf.get(KUBERNETES_NAMESPACE)
+    val appName = sparkConf.getOption("spark.app.name").getOrElse("spark")
     // For constructing the app ID, we can't use the Spark application name, as the app ID is going
     // to be added as a label to group resources belonging to the same application. Label values are
     // considerably restrictive, e.g. must be no longer than 63 characters in length. So we generate
@@ -219,10 +203,19 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
     val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}"
     val launchTime = System.currentTimeMillis()
     val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION)
-    val appName = sparkConf.getOption("spark.app.name").getOrElse("spark")
     val kubernetesResourceNamePrefix = {
       s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-")
     }
+    val kubernetesConf = KubernetesConf.createDriverConf(
+      sparkConf,
+      appName,
+      kubernetesResourceNamePrefix,
+      kubernetesAppId,
+      clientArguments.mainAppResource,
+      clientArguments.mainClass,
+      clientArguments.driverArgs)
+    val builder = new KubernetesDriverBuilder
+    val namespace = kubernetesConf.namespace()
     // The master URL has been checked for validity already in SparkSubmit.
     // We just need to get rid of the "k8s://" prefix here.
     val master = sparkConf.get("spark.master").substring("k8s://".length)
@@ -230,15 +223,6 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
 
     val watcher = new LoggingPodStatusWatcherImpl(kubernetesAppId, loggingInterval)
 
-    val orchestrator = new DriverConfigOrchestrator(
-      kubernetesAppId,
-      kubernetesResourceNamePrefix,
-      clientArguments.mainAppResource,
-      appName,
-      clientArguments.mainClass,
-      clientArguments.driverArgs,
-      sparkConf)
-
     Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient(
       master,
       Some(namespace),
@@ -247,8 +231,8 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
       None,
       None)) { kubernetesClient =>
         val client = new Client(
-          orchestrator.getAllConfigurationSteps,
-          sparkConf,
+          builder,
+          kubernetesConf,
           kubernetesClient,
           waitForAppCompletion,
           appName,
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
new file mode 100644
index 0000000000000..c7579ed8cb689
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.submit
+
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpec, KubernetesDriverSpecificConf, KubernetesRoleSpecificConf}
+import org.apache.spark.deploy.k8s.features.{BasicDriverFeatureStep, DriverKubernetesCredentialsFeatureStep, DriverServiceFeatureStep, MountSecretsFeatureStep}
+
+private[spark] class KubernetesDriverBuilder(
+    provideBasicStep: (KubernetesConf[KubernetesDriverSpecificConf]) => BasicDriverFeatureStep =
+      new BasicDriverFeatureStep(_),
+    provideCredentialsStep: (KubernetesConf[KubernetesDriverSpecificConf])
+      => DriverKubernetesCredentialsFeatureStep =
+      new DriverKubernetesCredentialsFeatureStep(_),
+    provideServiceStep: (KubernetesConf[KubernetesDriverSpecificConf]) => DriverServiceFeatureStep =
+      new DriverServiceFeatureStep(_),
+    provideSecretsStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf]
+      => MountSecretsFeatureStep) =
+      new MountSecretsFeatureStep(_)) {
+
+  def buildFromFeatures(
+    kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf]): KubernetesDriverSpec = {
+    val baseFeatures = Seq(
+      provideBasicStep(kubernetesConf),
+      provideCredentialsStep(kubernetesConf),
+      provideServiceStep(kubernetesConf))
+    val allFeatures = if (kubernetesConf.roleSecretNamesToMountPaths.nonEmpty) {
+      baseFeatures ++ Seq(provideSecretsStep(kubernetesConf))
+    } else baseFeatures
+
+    var spec = KubernetesDriverSpec.initialSpec(kubernetesConf.sparkConf.getAll.toMap)
+    for (feature <- allFeatures) {
+      val configuredPod = feature.configurePod(spec.pod)
+      val addedSystemProperties = feature.getAdditionalPodSystemProperties()
+      val addedResources = feature.getAdditionalKubernetesResources()
+      spec = KubernetesDriverSpec(
+        configuredPod,
+        spec.driverKubernetesResources ++ addedResources,
+        spec.systemProperties ++ addedSystemProperties)
+    }
+    spec
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverSpec.scala
deleted file mode 100644
index db13f09387ef9..0000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverSpec.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit
-
-import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, HasMetadata, Pod, PodBuilder}
-
-import org.apache.spark.SparkConf
-
-/**
- * Represents the components and characteristics of a Spark driver. The driver can be considered
- * as being comprised of the driver pod itself, any other Kubernetes resources that the driver
- * pod depends on, and the SparkConf that should be supplied to the Spark application. The driver
- * container should be operated on via the specific field of this case class as opposed to trying
- * to edit the container directly on the pod. The driver container should be attached at the
- * end of executing all submission steps.
- */
-private[spark] case class KubernetesDriverSpec(
-    driverPod: Pod,
-    driverContainer: Container,
-    otherKubernetesResources: Seq[HasMetadata],
-    driverSparkConf: SparkConf)
-
-private[spark] object KubernetesDriverSpec {
-  def initialSpec(initialSparkConf: SparkConf): KubernetesDriverSpec = {
-    KubernetesDriverSpec(
-      // Set new metadata and a new spec so that submission steps can use
-      // PodBuilder#editMetadata() and/or PodBuilder#editSpec() safely.
-      new PodBuilder().withNewMetadata().endMetadata().withNewSpec().endSpec().build(),
-      new ContainerBuilder().build(),
-      Seq.empty[HasMetadata],
-      initialSparkConf.clone())
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala
deleted file mode 100644
index fcb1db8008053..0000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit.steps
-
-import scala.collection.JavaConverters._
-
-import io.fabric8.kubernetes.api.model._
-
-import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.KubernetesUtils
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
-import org.apache.spark.internal.config.{DRIVER_CLASS_PATH, DRIVER_MEMORY, DRIVER_MEMORY_OVERHEAD}
-import org.apache.spark.launcher.SparkLauncher
-
-/**
- * Performs basic configuration for the driver pod.
- */
-private[spark] class BasicDriverConfigurationStep(
-    kubernetesAppId: String,
-    resourceNamePrefix: String,
-    driverLabels: Map[String, String],
-    imagePullPolicy: String,
-    appName: String,
-    mainClass: String,
-    appArgs: Array[String],
-    sparkConf: SparkConf) extends DriverConfigurationStep {
-
-  private val driverPodName = sparkConf
-    .get(KUBERNETES_DRIVER_POD_NAME)
-    .getOrElse(s"$resourceNamePrefix-driver")
-
-  private val driverExtraClasspath = sparkConf.get(DRIVER_CLASS_PATH)
-
-  private val driverContainerImage = sparkConf
-    .get(DRIVER_CONTAINER_IMAGE)
-    .getOrElse(throw new SparkException("Must specify the driver container image"))
-
-  private val imagePullSecrets = sparkConf.get(IMAGE_PULL_SECRETS)
-
-  // CPU settings
-  private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1")
-  private val driverLimitCores = sparkConf.get(KUBERNETES_DRIVER_LIMIT_CORES)
-
-  // Memory settings
-  private val driverMemoryMiB = sparkConf.get(DRIVER_MEMORY)
-  private val memoryOverheadMiB = sparkConf
-    .get(DRIVER_MEMORY_OVERHEAD)
-    .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMiB).toInt, MEMORY_OVERHEAD_MIN_MIB))
-  private val driverMemoryWithOverheadMiB = driverMemoryMiB + memoryOverheadMiB
-
-  override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
-    val driverExtraClasspathEnv = driverExtraClasspath.map { classPath =>
-      new EnvVarBuilder()
-        .withName(ENV_CLASSPATH)
-        .withValue(classPath)
-        .build()
-    }
-
-    val driverCustomAnnotations = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_DRIVER_ANNOTATION_PREFIX)
-    require(!driverCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION),
-      s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" +
-        " Spark bookkeeping operations.")
-
-    val driverCustomEnvs = sparkConf.getAllWithPrefix(KUBERNETES_DRIVER_ENV_KEY).toSeq
-      .map { env =>
-        new EnvVarBuilder()
-          .withName(env._1)
-          .withValue(env._2)
-          .build()
-      }
-
-    val driverAnnotations = driverCustomAnnotations ++ Map(SPARK_APP_NAME_ANNOTATION -> appName)
-
-    val nodeSelector = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_NODE_SELECTOR_PREFIX)
-
-    val driverCpuQuantity = new QuantityBuilder(false)
-      .withAmount(driverCpuCores)
-      .build()
-    val driverMemoryQuantity = new QuantityBuilder(false)
-      .withAmount(s"${driverMemoryWithOverheadMiB}Mi")
-      .build()
-    val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
-      ("cpu", new QuantityBuilder(false).withAmount(limitCores).build())
-    }
-
-    val driverContainerWithoutArgs = new ContainerBuilder(driverSpec.driverContainer)
-      .withName(DRIVER_CONTAINER_NAME)
-      .withImage(driverContainerImage)
-      .withImagePullPolicy(imagePullPolicy)
-      .addAllToEnv(driverCustomEnvs.asJava)
-      .addToEnv(driverExtraClasspathEnv.toSeq: _*)
-      .addNewEnv()
-        .withName(ENV_DRIVER_BIND_ADDRESS)
-        .withValueFrom(new EnvVarSourceBuilder()
-          .withNewFieldRef("v1", "status.podIP")
-          .build())
-        .endEnv()
-      .withNewResources()
-        .addToRequests("cpu", driverCpuQuantity)
-        .addToRequests("memory", driverMemoryQuantity)
-        .addToLimits("memory", driverMemoryQuantity)
-        .addToLimits(maybeCpuLimitQuantity.toMap.asJava)
-        .endResources()
-      .addToArgs("driver")
-      .addToArgs("--properties-file", SPARK_CONF_PATH)
-      .addToArgs("--class", mainClass)
-      // The user application jar is merged into the spark.jars list and managed through that
-      // property, so there is no need to reference it explicitly here.
-      .addToArgs(SparkLauncher.NO_RESOURCE)
-
-    val driverContainer = appArgs.toList match {
-      case "" :: Nil | Nil => driverContainerWithoutArgs.build()
-      case _ => driverContainerWithoutArgs.addToArgs(appArgs: _*).build()
-    }
-
-    val parsedImagePullSecrets = KubernetesUtils.parseImagePullSecrets(imagePullSecrets)
-
-    val baseDriverPod = new PodBuilder(driverSpec.driverPod)
-      .editOrNewMetadata()
-        .withName(driverPodName)
-        .addToLabels(driverLabels.asJava)
-        .addToAnnotations(driverAnnotations.asJava)
-      .endMetadata()
-      .withNewSpec()
-        .withRestartPolicy("Never")
-        .withNodeSelector(nodeSelector.asJava)
-        .withImagePullSecrets(parsedImagePullSecrets.asJava)
-        .endSpec()
-      .build()
-
-    val resolvedSparkConf = driverSpec.driverSparkConf.clone()
-      .setIfMissing(KUBERNETES_DRIVER_POD_NAME, driverPodName)
-      .set("spark.app.id", kubernetesAppId)
-      .set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, resourceNamePrefix)
-      // to set the config variables to allow client-mode spark-submit from driver
-      .set(KUBERNETES_DRIVER_SUBMIT_CHECK, true)
-
-    driverSpec.copy(
-      driverPod = baseDriverPod,
-      driverSparkConf = resolvedSparkConf,
-      driverContainer = driverContainer)
-  }
-
-}
-
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DependencyResolutionStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DependencyResolutionStep.scala
deleted file mode 100644
index 43de329f239ad..0000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DependencyResolutionStep.scala
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit.steps
-
-import java.io.File
-
-import io.fabric8.kubernetes.api.model.ContainerBuilder
-
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.KubernetesUtils
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
-
-/**
- * Step that configures the classpath, spark.jars, and spark.files for the driver given that the
- * user may provide remote files or files with local:// schemes.
- */
-private[spark] class DependencyResolutionStep(
-    sparkJars: Seq[String],
-    sparkFiles: Seq[String]) extends DriverConfigurationStep {
-
-  override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
-    val resolvedSparkJars = KubernetesUtils.resolveFileUrisAndPath(sparkJars)
-    val resolvedSparkFiles = KubernetesUtils.resolveFileUrisAndPath(sparkFiles)
-
-    val sparkConf = driverSpec.driverSparkConf.clone()
-    if (resolvedSparkJars.nonEmpty) {
-      sparkConf.set("spark.jars", resolvedSparkJars.mkString(","))
-    }
-    if (resolvedSparkFiles.nonEmpty) {
-      sparkConf.set("spark.files", resolvedSparkFiles.mkString(","))
-    }
-    val resolvedDriverContainer = if (resolvedSparkJars.nonEmpty) {
-      new ContainerBuilder(driverSpec.driverContainer)
-        .addNewEnv()
-          .withName(ENV_MOUNTED_CLASSPATH)
-          .withValue(resolvedSparkJars.mkString(File.pathSeparator))
-          .endEnv()
-        .build()
-    } else {
-      driverSpec.driverContainer
-    }
-
-    driverSpec.copy(
-      driverContainer = resolvedDriverContainer,
-      driverSparkConf = sparkConf)
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverKubernetesCredentialsStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverKubernetesCredentialsStep.scala
deleted file mode 100644
index 2424e63999a82..0000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverKubernetesCredentialsStep.scala
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit.steps
-
-import java.io.File
-import java.nio.charset.StandardCharsets
-
-import scala.collection.JavaConverters._
-import scala.language.implicitConversions
-
-import com.google.common.io.{BaseEncoding, Files}
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret, SecretBuilder}
-
-import org.apache.spark.SparkConf
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
-
-/**
- * Mounts Kubernetes credentials into the driver pod. The driver will use such mounted credentials
- * to request executors.
- */
-private[spark] class DriverKubernetesCredentialsStep(
-    submissionSparkConf: SparkConf,
-    kubernetesResourceNamePrefix: String) extends DriverConfigurationStep {
-
-  private val maybeMountedOAuthTokenFile = submissionSparkConf.getOption(
-    s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX")
-  private val maybeMountedClientKeyFile = submissionSparkConf.getOption(
-    s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX")
-  private val maybeMountedClientCertFile = submissionSparkConf.getOption(
-    s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX")
-  private val maybeMountedCaCertFile = submissionSparkConf.getOption(
-    s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX")
-  private val driverServiceAccount = submissionSparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME)
-
-  override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
-    val driverSparkConf = driverSpec.driverSparkConf.clone()
-
-    val oauthTokenBase64 = submissionSparkConf
-      .getOption(s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX")
-      .map { token =>
-        BaseEncoding.base64().encode(token.getBytes(StandardCharsets.UTF_8))
-      }
-    val caCertDataBase64 = safeFileConfToBase64(
-      s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX",
-      "Driver CA cert file")
-    val clientKeyDataBase64 = safeFileConfToBase64(
-      s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX",
-      "Driver client key file")
-    val clientCertDataBase64 = safeFileConfToBase64(
-      s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX",
-      "Driver client cert file")
-
-    val driverSparkConfWithCredentialsLocations = setDriverPodKubernetesCredentialLocations(
-      driverSparkConf,
-      oauthTokenBase64,
-      caCertDataBase64,
-      clientKeyDataBase64,
-      clientCertDataBase64)
-
-    val kubernetesCredentialsSecret = createCredentialsSecret(
-      oauthTokenBase64,
-      caCertDataBase64,
-      clientKeyDataBase64,
-      clientCertDataBase64)
-
-    val driverPodWithMountedKubernetesCredentials = kubernetesCredentialsSecret.map { secret =>
-      new PodBuilder(driverSpec.driverPod)
-        .editOrNewSpec()
-          .addNewVolume()
-            .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)
-            .withNewSecret().withSecretName(secret.getMetadata.getName).endSecret()
-            .endVolume()
-          .endSpec()
-        .build()
-    }.getOrElse(
-      driverServiceAccount.map { account =>
-        new PodBuilder(driverSpec.driverPod)
-          .editOrNewSpec()
-          .withServiceAccount(account)
-          .withServiceAccountName(account)
-          .endSpec()
-          .build()
-      }.getOrElse(driverSpec.driverPod)
-    )
-
-    val driverContainerWithMountedSecretVolume = kubernetesCredentialsSecret.map { _ =>
-      new ContainerBuilder(driverSpec.driverContainer)
-        .addNewVolumeMount()
-          .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)
-          .withMountPath(DRIVER_CREDENTIALS_SECRETS_BASE_DIR)
-          .endVolumeMount()
-        .build()
-    }.getOrElse(driverSpec.driverContainer)
-
-    driverSpec.copy(
-      driverPod = driverPodWithMountedKubernetesCredentials,
-      otherKubernetesResources =
-        driverSpec.otherKubernetesResources ++ kubernetesCredentialsSecret.toSeq,
-      driverSparkConf = driverSparkConfWithCredentialsLocations,
-      driverContainer = driverContainerWithMountedSecretVolume)
-  }
-
-  private def createCredentialsSecret(
-      driverOAuthTokenBase64: Option[String],
-      driverCaCertDataBase64: Option[String],
-      driverClientKeyDataBase64: Option[String],
-      driverClientCertDataBase64: Option[String]): Option[Secret] = {
-    val allSecretData =
-      resolveSecretData(
-        driverClientKeyDataBase64,
-        DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME) ++
-      resolveSecretData(
-        driverClientCertDataBase64,
-        DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME) ++
-      resolveSecretData(
-        driverCaCertDataBase64,
-        DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME) ++
-      resolveSecretData(
-        driverOAuthTokenBase64,
-        DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME)
-
-    if (allSecretData.isEmpty) {
-      None
-    } else {
-      Some(new SecretBuilder()
-        .withNewMetadata()
-          .withName(s"$kubernetesResourceNamePrefix-kubernetes-credentials")
-          .endMetadata()
-        .withData(allSecretData.asJava)
-        .build())
-    }
-  }
-
-  private def setDriverPodKubernetesCredentialLocations(
-      driverSparkConf: SparkConf,
-      driverOauthTokenBase64: Option[String],
-      driverCaCertDataBase64: Option[String],
-      driverClientKeyDataBase64: Option[String],
-      driverClientCertDataBase64: Option[String]): SparkConf = {
-    val resolvedMountedOAuthTokenFile = resolveSecretLocation(
-      maybeMountedOAuthTokenFile,
-      driverOauthTokenBase64,
-      DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH)
-    val resolvedMountedClientKeyFile = resolveSecretLocation(
-      maybeMountedClientKeyFile,
-      driverClientKeyDataBase64,
-      DRIVER_CREDENTIALS_CLIENT_KEY_PATH)
-    val resolvedMountedClientCertFile = resolveSecretLocation(
-      maybeMountedClientCertFile,
-      driverClientCertDataBase64,
-      DRIVER_CREDENTIALS_CLIENT_CERT_PATH)
-    val resolvedMountedCaCertFile = resolveSecretLocation(
-      maybeMountedCaCertFile,
-      driverCaCertDataBase64,
-      DRIVER_CREDENTIALS_CA_CERT_PATH)
-
-    val sparkConfWithCredentialLocations = driverSparkConf
-      .setOption(
-        s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX",
-        resolvedMountedCaCertFile)
-      .setOption(
-        s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX",
-        resolvedMountedClientKeyFile)
-      .setOption(
-        s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX",
-        resolvedMountedClientCertFile)
-      .setOption(
-        s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX",
-        resolvedMountedOAuthTokenFile)
-
-    // Redact all OAuth token values
-    sparkConfWithCredentialLocations
-      .getAll
-      .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX)).map(_._1)
-      .foreach {
-        sparkConfWithCredentialLocations.set(_, "<present_but_redacted>")
-      }
-    sparkConfWithCredentialLocations
-  }
-
-  private def safeFileConfToBase64(conf: String, fileType: String): Option[String] = {
-    submissionSparkConf.getOption(conf)
-      .map(new File(_))
-      .map { file =>
-        require(file.isFile, String.format("%s provided at %s does not exist or is not a file.",
-          fileType, file.getAbsolutePath))
-        BaseEncoding.base64().encode(Files.toByteArray(file))
-      }
-  }
-
-  private def resolveSecretLocation(
-      mountedUserSpecified: Option[String],
-      valueMountedFromSubmitter: Option[String],
-      mountedCanonicalLocation: String): Option[String] = {
-    mountedUserSpecified.orElse(valueMountedFromSubmitter.map { _ =>
-      mountedCanonicalLocation
-    })
-  }
-
-  /**
-   * Resolve a Kubernetes secret data entry from an optional client credential used by the
-   * driver to talk to the Kubernetes API server.
-   *
-   * @param userSpecifiedCredential the optional user-specified client credential.
-   * @param secretName name of the Kubernetes secret storing the client credential.
-   * @return a secret data entry in the form of a map from the secret name to the secret data,
-   *         which may be empty if the user-specified credential is empty.
-   */
-  private def resolveSecretData(
-      userSpecifiedCredential: Option[String],
-      secretName: String): Map[String, String] = {
-    userSpecifiedCredential.map { valueBase64 =>
-      Map(secretName -> valueBase64)
-    }.getOrElse(Map.empty[String, String])
-  }
-
-  private implicit def augmentSparkConf(sparkConf: SparkConf): OptionSettableSparkConf = {
-    new OptionSettableSparkConf(sparkConf)
-  }
-}
-
-private class OptionSettableSparkConf(sparkConf: SparkConf) {
-  def setOption(configEntry: String, option: Option[String]): SparkConf = {
-    option.foreach { opt =>
-      sparkConf.set(configEntry, opt)
-    }
-    sparkConf
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverServiceBootstrapStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverServiceBootstrapStep.scala
deleted file mode 100644
index 34af7cde6c1a9..0000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverServiceBootstrapStep.scala
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit.steps
-
-import scala.collection.JavaConverters._
-
-import io.fabric8.kubernetes.api.model.ServiceBuilder
-
-import org.apache.spark.SparkConf
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
-import org.apache.spark.internal.Logging
-import org.apache.spark.util.Clock
-
-/**
- * Allows the driver to be reachable by executor pods through a headless service. The service's
- * ports should correspond to the ports that the executor will reach the pod at for RPC.
- */
-private[spark] class DriverServiceBootstrapStep(
-    resourceNamePrefix: String,
-    driverLabels: Map[String, String],
-    sparkConf: SparkConf,
-    clock: Clock) extends DriverConfigurationStep with Logging {
-
-  import DriverServiceBootstrapStep._
-
-  override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
-    require(sparkConf.getOption(DRIVER_BIND_ADDRESS_KEY).isEmpty,
-      s"$DRIVER_BIND_ADDRESS_KEY is not supported in Kubernetes mode, as the driver's bind " +
-      "address is managed and set to the driver pod's IP address.")
-    require(sparkConf.getOption(DRIVER_HOST_KEY).isEmpty,
-      s"$DRIVER_HOST_KEY is not supported in Kubernetes mode, as the driver's hostname will be " +
-      "managed via a Kubernetes service.")
-
-    val preferredServiceName = s"$resourceNamePrefix$DRIVER_SVC_POSTFIX"
-    val resolvedServiceName = if (preferredServiceName.length <= MAX_SERVICE_NAME_LENGTH) {
-      preferredServiceName
-    } else {
-      val randomServiceId = clock.getTimeMillis()
-      val shorterServiceName = s"spark-$randomServiceId$DRIVER_SVC_POSTFIX"
-      logWarning(s"Driver's hostname would preferably be $preferredServiceName, but this is " +
-        s"too long (must be <= $MAX_SERVICE_NAME_LENGTH characters). Falling back to use " +
-        s"$shorterServiceName as the driver service's name.")
-      shorterServiceName
-    }
-
-    val driverPort = sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT)
-    val driverBlockManagerPort = sparkConf.getInt(
-        org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT.key, DEFAULT_BLOCKMANAGER_PORT)
-    val driverService = new ServiceBuilder()
-      .withNewMetadata()
-        .withName(resolvedServiceName)
-        .endMetadata()
-      .withNewSpec()
-        .withClusterIP("None")
-        .withSelector(driverLabels.asJava)
-        .addNewPort()
-          .withName(DRIVER_PORT_NAME)
-          .withPort(driverPort)
-          .withNewTargetPort(driverPort)
-          .endPort()
-        .addNewPort()
-          .withName(BLOCK_MANAGER_PORT_NAME)
-          .withPort(driverBlockManagerPort)
-          .withNewTargetPort(driverBlockManagerPort)
-          .endPort()
-        .endSpec()
-      .build()
-
-    val namespace = sparkConf.get(KUBERNETES_NAMESPACE)
-    val driverHostname = s"${driverService.getMetadata.getName}.$namespace.svc"
-    val resolvedSparkConf = driverSpec.driverSparkConf.clone()
-      .set(DRIVER_HOST_KEY, driverHostname)
-      .set("spark.driver.port", driverPort.toString)
-      .set(
-        org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT, driverBlockManagerPort)
-
-    driverSpec.copy(
-      driverSparkConf = resolvedSparkConf,
-      otherKubernetesResources = driverSpec.otherKubernetesResources ++ Seq(driverService))
-  }
-}
-
-private[spark] object DriverServiceBootstrapStep {
-  val DRIVER_BIND_ADDRESS_KEY = org.apache.spark.internal.config.DRIVER_BIND_ADDRESS.key
-  val DRIVER_HOST_KEY = org.apache.spark.internal.config.DRIVER_HOST_ADDRESS.key
-  val DRIVER_SVC_POSTFIX = "-driver-svc"
-  val MAX_SERVICE_NAME_LENGTH = 63
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala
deleted file mode 100644
index 8607d6fba3234..0000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.scheduler.cluster.k8s
-
-import scala.collection.JavaConverters._
-
-import io.fabric8.kubernetes.api.model._
-
-import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.deploy.k8s.{KubernetesUtils, MountSecretsBootstrap}
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.internal.config.{EXECUTOR_CLASS_PATH, EXECUTOR_JAVA_OPTIONS, EXECUTOR_MEMORY, EXECUTOR_MEMORY_OVERHEAD}
-import org.apache.spark.util.Utils
-
-/**
- * A factory class for bootstrapping and creating executor pods with the given bootstrapping
- * components.
- *
- * @param sparkConf Spark configuration
- * @param mountSecretsBootstrap an optional component for mounting user-specified secrets onto
- *                              user-specified paths into the executor container
- */
-private[spark] class ExecutorPodFactory(
-    sparkConf: SparkConf,
-    mountSecretsBootstrap: Option[MountSecretsBootstrap]) {
-
-  private val executorExtraClasspath = sparkConf.get(EXECUTOR_CLASS_PATH)
-
-  private val executorLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
-    sparkConf,
-    KUBERNETES_EXECUTOR_LABEL_PREFIX)
-  require(
-    !executorLabels.contains(SPARK_APP_ID_LABEL),
-    s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is reserved for Spark.")
-  require(
-    !executorLabels.contains(SPARK_EXECUTOR_ID_LABEL),
-    s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" +
-      " Spark.")
-  require(
-    !executorLabels.contains(SPARK_ROLE_LABEL),
-    s"Custom executor labels cannot contain $SPARK_ROLE_LABEL as it is reserved for Spark.")
-
-  private val executorAnnotations =
-    KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf,
-      KUBERNETES_EXECUTOR_ANNOTATION_PREFIX)
-  private val nodeSelector =
-    KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf,
-      KUBERNETES_NODE_SELECTOR_PREFIX)
-
-  private val executorContainerImage = sparkConf
-    .get(EXECUTOR_CONTAINER_IMAGE)
-    .getOrElse(throw new SparkException("Must specify the executor container image"))
-  private val imagePullPolicy = sparkConf.get(CONTAINER_IMAGE_PULL_POLICY)
-  private val imagePullSecrets = sparkConf.get(IMAGE_PULL_SECRETS)
-  private val blockManagerPort = sparkConf
-    .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT)
-
-  private val executorPodNamePrefix = sparkConf.get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX)
-
-  private val executorMemoryMiB = sparkConf.get(EXECUTOR_MEMORY)
-  private val executorMemoryString = sparkConf.get(
-    EXECUTOR_MEMORY.key, EXECUTOR_MEMORY.defaultValueString)
-
-  private val memoryOverheadMiB = sparkConf
-    .get(EXECUTOR_MEMORY_OVERHEAD)
-    .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * executorMemoryMiB).toInt,
-      MEMORY_OVERHEAD_MIN_MIB))
-  private val executorMemoryWithOverhead = executorMemoryMiB + memoryOverheadMiB
-
-  private val executorCores = sparkConf.getInt("spark.executor.cores", 1)
-  private val executorCoresRequest = if (sparkConf.contains(KUBERNETES_EXECUTOR_REQUEST_CORES)) {
-    sparkConf.get(KUBERNETES_EXECUTOR_REQUEST_CORES).get
-  } else {
-    executorCores.toString
-  }
-  private val executorLimitCores = sparkConf.get(KUBERNETES_EXECUTOR_LIMIT_CORES)
-
-  /**
-   * Configure and construct an executor pod with the given parameters.
-   */
-  def createExecutorPod(
-      executorId: String,
-      applicationId: String,
-      driverUrl: String,
-      executorEnvs: Seq[(String, String)],
-      driverPod: Pod,
-      nodeToLocalTaskCount: Map[String, Int]): Pod = {
-    val name = s"$executorPodNamePrefix-exec-$executorId"
-
-    val parsedImagePullSecrets = KubernetesUtils.parseImagePullSecrets(imagePullSecrets)
-
-    // hostname must be no longer than 63 characters, so take the last 63 characters of the pod
-    // name as the hostname.  This preserves uniqueness since the end of name contains
-    // executorId
-    val hostname = name.substring(Math.max(0, name.length - 63))
-    val resolvedExecutorLabels = Map(
-      SPARK_EXECUTOR_ID_LABEL -> executorId,
-      SPARK_APP_ID_LABEL -> applicationId,
-      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++
-      executorLabels
-    val executorMemoryQuantity = new QuantityBuilder(false)
-      .withAmount(s"${executorMemoryWithOverhead}Mi")
-      .build()
-    val executorCpuQuantity = new QuantityBuilder(false)
-      .withAmount(executorCoresRequest)
-      .build()
-    val executorExtraClasspathEnv = executorExtraClasspath.map { cp =>
-      new EnvVarBuilder()
-        .withName(ENV_CLASSPATH)
-        .withValue(cp)
-        .build()
-    }
-    val executorExtraJavaOptionsEnv = sparkConf
-      .get(EXECUTOR_JAVA_OPTIONS)
-      .map { opts =>
-        val delimitedOpts = Utils.splitCommandString(opts)
-        delimitedOpts.zipWithIndex.map {
-          case (opt, index) =>
-            new EnvVarBuilder().withName(s"$ENV_JAVA_OPT_PREFIX$index").withValue(opt).build()
-        }
-      }.getOrElse(Seq.empty[EnvVar])
-    val executorEnv = (Seq(
-      (ENV_DRIVER_URL, driverUrl),
-      (ENV_EXECUTOR_CORES, executorCores.toString),
-      (ENV_EXECUTOR_MEMORY, executorMemoryString),
-      (ENV_APPLICATION_ID, applicationId),
-      // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
-      (ENV_SPARK_CONF_DIR, SPARK_CONF_DIR_INTERNAL),
-      (ENV_EXECUTOR_ID, executorId)) ++ executorEnvs)
-      .map(env => new EnvVarBuilder()
-        .withName(env._1)
-        .withValue(env._2)
-        .build()
-      ) ++ Seq(
-      new EnvVarBuilder()
-        .withName(ENV_EXECUTOR_POD_IP)
-        .withValueFrom(new EnvVarSourceBuilder()
-          .withNewFieldRef("v1", "status.podIP")
-          .build())
-        .build()
-    ) ++ executorExtraJavaOptionsEnv ++ executorExtraClasspathEnv.toSeq
-    val requiredPorts = Seq(
-      (BLOCK_MANAGER_PORT_NAME, blockManagerPort))
-      .map { case (name, port) =>
-        new ContainerPortBuilder()
-          .withName(name)
-          .withContainerPort(port)
-          .build()
-      }
-
-    val executorContainer = new ContainerBuilder()
-      .withName("executor")
-      .withImage(executorContainerImage)
-      .withImagePullPolicy(imagePullPolicy)
-      .withNewResources()
-        .addToRequests("memory", executorMemoryQuantity)
-        .addToLimits("memory", executorMemoryQuantity)
-        .addToRequests("cpu", executorCpuQuantity)
-        .endResources()
-      .addAllToEnv(executorEnv.asJava)
-      .withPorts(requiredPorts.asJava)
-      .addToArgs("executor")
-      .build()
-
-    val executorPod = new PodBuilder()
-      .withNewMetadata()
-        .withName(name)
-        .withLabels(resolvedExecutorLabels.asJava)
-        .withAnnotations(executorAnnotations.asJava)
-        .withOwnerReferences()
-          .addNewOwnerReference()
-            .withController(true)
-            .withApiVersion(driverPod.getApiVersion)
-            .withKind(driverPod.getKind)
-            .withName(driverPod.getMetadata.getName)
-            .withUid(driverPod.getMetadata.getUid)
-            .endOwnerReference()
-        .endMetadata()
-      .withNewSpec()
-        .withHostname(hostname)
-        .withRestartPolicy("Never")
-        .withNodeSelector(nodeSelector.asJava)
-        .withImagePullSecrets(parsedImagePullSecrets.asJava)
-        .endSpec()
-      .build()
-
-    val containerWithLimitCores = executorLimitCores.map { limitCores =>
-      val executorCpuLimitQuantity = new QuantityBuilder(false)
-        .withAmount(limitCores)
-        .build()
-      new ContainerBuilder(executorContainer)
-        .editResources()
-        .addToLimits("cpu", executorCpuLimitQuantity)
-        .endResources()
-        .build()
-    }.getOrElse(executorContainer)
-
-    val (maybeSecretsMountedPod, maybeSecretsMountedContainer) =
-      mountSecretsBootstrap.map { bootstrap =>
-        (bootstrap.addSecretVolumes(executorPod), bootstrap.mountSecrets(containerWithLimitCores))
-      }.getOrElse((executorPod, containerWithLimitCores))
-
-
-    new PodBuilder(maybeSecretsMountedPod)
-      .editSpec()
-        .addToContainers(maybeSecretsMountedContainer)
-        .endSpec()
-      .build()
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index ff5f6801da2a3..0ea80dfbc0d97 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -21,7 +21,7 @@ import java.io.File
 import io.fabric8.kubernetes.client.Config
 
 import org.apache.spark.{SparkContext, SparkException}
-import org.apache.spark.deploy.k8s.{KubernetesUtils, MountSecretsBootstrap, SparkKubernetesClientFactory}
+import org.apache.spark.deploy.k8s.{KubernetesUtils, SparkKubernetesClientFactory}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
@@ -48,12 +48,6 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
       scheduler: TaskScheduler): SchedulerBackend = {
     val executorSecretNamesToMountPaths = KubernetesUtils.parsePrefixedKeyValuePairs(
       sc.conf, KUBERNETES_EXECUTOR_SECRETS_PREFIX)
-    val mountSecretBootstrap = if (executorSecretNamesToMountPaths.nonEmpty) {
-      Some(new MountSecretsBootstrap(executorSecretNamesToMountPaths))
-    } else {
-      None
-    }
-
     val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient(
       KUBERNETES_MASTER_INTERNAL_URL,
       Some(sc.conf.get(KUBERNETES_NAMESPACE)),
@@ -62,8 +56,6 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
       Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)),
       Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH)))
 
-    val executorPodFactory = new ExecutorPodFactory(sc.conf, mountSecretBootstrap)
-
     val allocatorExecutor = ThreadUtils
       .newDaemonSingleThreadScheduledExecutor("kubernetes-pod-allocator")
     val requestExecutorsService = ThreadUtils.newDaemonCachedThreadPool(
@@ -71,7 +63,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
     new KubernetesClusterSchedulerBackend(
       scheduler.asInstanceOf[TaskSchedulerImpl],
       sc.env.rpcEnv,
-      executorPodFactory,
+      new KubernetesExecutorBuilder,
       kubernetesClient,
       allocatorExecutor,
       requestExecutorsService)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 9de4b16c30d3c..d86664c81071b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -32,6 +32,7 @@ import scala.concurrent.{ExecutionContext, Future}
 import org.apache.spark.SparkException
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.KubernetesConf
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointAddress, RpcEnv}
 import org.apache.spark.scheduler.{ExecutorExited, SlaveLost, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SchedulerBackendUtils}
@@ -40,7 +41,7 @@ import org.apache.spark.util.Utils
 private[spark] class KubernetesClusterSchedulerBackend(
     scheduler: TaskSchedulerImpl,
     rpcEnv: RpcEnv,
-    executorPodFactory: ExecutorPodFactory,
+    executorBuilder: KubernetesExecutorBuilder,
     kubernetesClient: KubernetesClient,
     allocatorExecutor: ScheduledExecutorService,
     requestExecutorsService: ExecutorService)
@@ -115,14 +116,19 @@ private[spark] class KubernetesClusterSchedulerBackend(
           for (_ <- 0 until math.min(
             currentTotalExpectedExecutors - runningExecutorsToPods.size, podAllocationSize)) {
             val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString
-            val executorPod = executorPodFactory.createExecutorPod(
+            val executorConf = KubernetesConf.createExecutorConf(
+              conf,
               executorId,
               applicationId(),
-              driverUrl,
-              conf.getExecutorEnv,
-              driverPod,
-              currentNodeToLocalTaskCount)
-            executorsToAllocate(executorId) = executorPod
+              driverPod)
+            val executorPod = executorBuilder.buildFromFeatures(executorConf)
+            val podWithAttachedContainer = new PodBuilder(executorPod.pod)
+              .editOrNewSpec()
+                .addToContainers(executorPod.container)
+                .endSpec()
+              .build()
+
+            executorsToAllocate(executorId) = podWithAttachedContainer
             logInfo(
               s"Requesting a new executor, total executors is now ${runningExecutorsToPods.size}")
           }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
new file mode 100644
index 0000000000000..22568fe7ea3be
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler.cluster.k8s
+
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, KubernetesRoleSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.features.{BasicExecutorFeatureStep, MountSecretsFeatureStep}
+
+private[spark] class KubernetesExecutorBuilder(
+    provideBasicStep: (KubernetesConf[KubernetesExecutorSpecificConf]) => BasicExecutorFeatureStep =
+      new BasicExecutorFeatureStep(_),
+    provideSecretsStep:
+      (KubernetesConf[_ <: KubernetesRoleSpecificConf]) => MountSecretsFeatureStep =
+      new MountSecretsFeatureStep(_)) {
+
+  def buildFromFeatures(
+    kubernetesConf: KubernetesConf[KubernetesExecutorSpecificConf]): SparkPod = {
+    val baseFeatures = Seq(provideBasicStep(kubernetesConf))
+    val allFeatures = if (kubernetesConf.roleSecretNamesToMountPaths.nonEmpty) {
+      baseFeatures ++ Seq(provideSecretsStep(kubernetesConf))
+    } else baseFeatures
+    var executorPod = SparkPod.initialPod()
+    for (feature <- allFeatures) {
+      executorPod = feature.configurePod(executorPod)
+    }
+    executorPod
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
new file mode 100644
index 0000000000000..f10202f7a3546
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.k8s
+
+import io.fabric8.kubernetes.api.model.{LocalObjectReferenceBuilder, PodBuilder}
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
+
+class KubernetesConfSuite extends SparkFunSuite {
+
+  private val APP_NAME = "test-app"
+  private val RESOURCE_NAME_PREFIX = "prefix"
+  private val APP_ID = "test-id"
+  private val MAIN_CLASS = "test-class"
+  private val APP_ARGS = Array("arg1", "arg2")
+  private val CUSTOM_LABELS = Map(
+    "customLabel1Key" -> "customLabel1Value",
+    "customLabel2Key" -> "customLabel2Value")
+  private val CUSTOM_ANNOTATIONS = Map(
+    "customAnnotation1Key" -> "customAnnotation1Value",
+    "customAnnotation2Key" -> "customAnnotation2Value")
+  private val SECRET_NAMES_TO_MOUNT_PATHS = Map(
+    "secret1" -> "/mnt/secrets/secret1",
+    "secret2" -> "/mnt/secrets/secret2")
+  private val CUSTOM_ENVS = Map(
+    "customEnvKey1" -> "customEnvValue1",
+    "customEnvKey2" -> "customEnvValue2")
+  private val DRIVER_POD = new PodBuilder().build()
+  private val EXECUTOR_ID = "executor-id"
+
+  test("Basic driver translated fields.") {
+    val sparkConf = new SparkConf(false)
+    val conf = KubernetesConf.createDriverConf(
+      sparkConf,
+      APP_NAME,
+      RESOURCE_NAME_PREFIX,
+      APP_ID,
+      None,
+      MAIN_CLASS,
+      APP_ARGS)
+    assert(conf.appId === APP_ID)
+    assert(conf.sparkConf.getAll.toMap === sparkConf.getAll.toMap)
+    assert(conf.appResourceNamePrefix === RESOURCE_NAME_PREFIX)
+    assert(conf.roleSpecificConf.appName === APP_NAME)
+    assert(conf.roleSpecificConf.mainAppResource.isEmpty)
+    assert(conf.roleSpecificConf.mainClass === MAIN_CLASS)
+    assert(conf.roleSpecificConf.appArgs === APP_ARGS)
+  }
+
+  test("Creating driver conf with and without the main app jar influences spark.jars") {
+    val sparkConf = new SparkConf(false)
+      .setJars(Seq("local:///opt/spark/jar1.jar"))
+    val mainAppJar = Some(JavaMainAppResource("local:///opt/spark/main.jar"))
+    val kubernetesConfWithMainJar = KubernetesConf.createDriverConf(
+      sparkConf,
+      APP_NAME,
+      RESOURCE_NAME_PREFIX,
+      APP_ID,
+      mainAppJar,
+      MAIN_CLASS,
+      APP_ARGS)
+    assert(kubernetesConfWithMainJar.sparkConf.get("spark.jars")
+      .split(",")
+      === Array("local:///opt/spark/jar1.jar", "local:///opt/spark/main.jar"))
+    val kubernetesConfWithoutMainJar = KubernetesConf.createDriverConf(
+      sparkConf,
+      APP_NAME,
+      RESOURCE_NAME_PREFIX,
+      APP_ID,
+      None,
+      MAIN_CLASS,
+      APP_ARGS)
+    assert(kubernetesConfWithoutMainJar.sparkConf.get("spark.jars").split(",")
+      === Array("local:///opt/spark/jar1.jar"))
+  }
+
+  test("Resolve driver labels, annotations, secret mount paths, and envs.") {
+    val sparkConf = new SparkConf(false)
+    CUSTOM_LABELS.foreach { case (key, value) =>
+      sparkConf.set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$key", value)
+    }
+    CUSTOM_ANNOTATIONS.foreach { case (key, value) =>
+      sparkConf.set(s"$KUBERNETES_DRIVER_ANNOTATION_PREFIX$key", value)
+    }
+    SECRET_NAMES_TO_MOUNT_PATHS.foreach { case (key, value) =>
+      sparkConf.set(s"$KUBERNETES_DRIVER_SECRETS_PREFIX$key", value)
+    }
+    CUSTOM_ENVS.foreach { case (key, value) =>
+      sparkConf.set(s"$KUBERNETES_DRIVER_ENV_PREFIX$key", value)
+    }
+
+    val conf = KubernetesConf.createDriverConf(
+      sparkConf,
+      APP_NAME,
+      RESOURCE_NAME_PREFIX,
+      APP_ID,
+      None,
+      MAIN_CLASS,
+      APP_ARGS)
+    assert(conf.roleLabels === Map(
+      SPARK_APP_ID_LABEL -> APP_ID,
+      SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) ++
+      CUSTOM_LABELS)
+    assert(conf.roleAnnotations === CUSTOM_ANNOTATIONS)
+    assert(conf.roleSecretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
+    assert(conf.roleEnvs === CUSTOM_ENVS)
+  }
+
+  test("Basic executor translated fields.") {
+    val conf = KubernetesConf.createExecutorConf(
+      new SparkConf(false),
+      EXECUTOR_ID,
+      APP_ID,
+      DRIVER_POD)
+    assert(conf.roleSpecificConf.executorId === EXECUTOR_ID)
+    assert(conf.roleSpecificConf.driverPod === DRIVER_POD)
+  }
+
+  test("Image pull secrets.") {
+    val conf = KubernetesConf.createExecutorConf(
+      new SparkConf(false)
+        .set(IMAGE_PULL_SECRETS, "my-secret-1,my-secret-2 "),
+      EXECUTOR_ID,
+      APP_ID,
+      DRIVER_POD)
+    assert(conf.imagePullSecrets() ===
+      Seq(
+        new LocalObjectReferenceBuilder().withName("my-secret-1").build(),
+        new LocalObjectReferenceBuilder().withName("my-secret-2").build()))
+  }
+
+  test("Set executor labels, annotations, and secrets") {
+    val sparkConf = new SparkConf(false)
+    CUSTOM_LABELS.foreach { case (key, value) =>
+      sparkConf.set(s"$KUBERNETES_EXECUTOR_LABEL_PREFIX$key", value)
+    }
+    CUSTOM_ANNOTATIONS.foreach { case (key, value) =>
+      sparkConf.set(s"$KUBERNETES_EXECUTOR_ANNOTATION_PREFIX$key", value)
+    }
+    SECRET_NAMES_TO_MOUNT_PATHS.foreach { case (key, value) =>
+      sparkConf.set(s"$KUBERNETES_EXECUTOR_SECRETS_PREFIX$key", value)
+    }
+
+    val conf = KubernetesConf.createExecutorConf(
+      sparkConf,
+      EXECUTOR_ID,
+      APP_ID,
+      DRIVER_POD)
+    assert(conf.roleLabels === Map(
+      SPARK_EXECUTOR_ID_LABEL -> EXECUTOR_ID,
+      SPARK_APP_ID_LABEL -> APP_ID,
+      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++ CUSTOM_LABELS)
+    assert(conf.roleAnnotations === CUSTOM_ANNOTATIONS)
+    assert(conf.roleSecretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
+  }
+
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
new file mode 100644
index 0000000000000..eee85b8baa730
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model.LocalObjectReferenceBuilder
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+
+class BasicDriverFeatureStepSuite extends SparkFunSuite {
+
+  private val APP_ID = "spark-app-id"
+  private val RESOURCE_NAME_PREFIX = "spark"
+  private val DRIVER_LABELS = Map("labelkey" -> "labelvalue")
+  private val CONTAINER_IMAGE_PULL_POLICY = "IfNotPresent"
+  private val APP_NAME = "spark-test"
+  private val MAIN_CLASS = "org.apache.spark.examples.SparkPi"
+  private val APP_ARGS = Array("arg1", "arg2", "\"arg 3\"")
+  private val CUSTOM_ANNOTATION_KEY = "customAnnotation"
+  private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue"
+  private val DRIVER_ANNOTATIONS = Map(CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE)
+  private val DRIVER_CUSTOM_ENV1 = "customDriverEnv1"
+  private val DRIVER_CUSTOM_ENV2 = "customDriverEnv2"
+  private val DRIVER_ENVS = Map(
+    DRIVER_CUSTOM_ENV1 -> DRIVER_CUSTOM_ENV1,
+    DRIVER_CUSTOM_ENV2 -> DRIVER_CUSTOM_ENV2)
+  private val TEST_IMAGE_PULL_SECRETS = Seq("my-secret-1", "my-secret-2")
+  private val TEST_IMAGE_PULL_SECRET_OBJECTS =
+    TEST_IMAGE_PULL_SECRETS.map { secret =>
+      new LocalObjectReferenceBuilder().withName(secret).build()
+    }
+
+  test("Check the pod respects all configurations from the user.") {
+    val sparkConf = new SparkConf()
+      .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod")
+      .set("spark.driver.cores", "2")
+      .set(KUBERNETES_DRIVER_LIMIT_CORES, "4")
+      .set(org.apache.spark.internal.config.DRIVER_MEMORY.key, "256M")
+      .set(org.apache.spark.internal.config.DRIVER_MEMORY_OVERHEAD, 200L)
+      .set(CONTAINER_IMAGE, "spark-driver:latest")
+      .set(IMAGE_PULL_SECRETS, TEST_IMAGE_PULL_SECRETS.mkString(","))
+    val kubernetesConf = KubernetesConf(
+      sparkConf,
+      KubernetesDriverSpecificConf(
+        None,
+        APP_NAME,
+        MAIN_CLASS,
+        APP_ARGS),
+      RESOURCE_NAME_PREFIX,
+      APP_ID,
+      DRIVER_LABELS,
+      DRIVER_ANNOTATIONS,
+      Map.empty,
+      DRIVER_ENVS)
+
+    val featureStep = new BasicDriverFeatureStep(kubernetesConf)
+    val basePod = SparkPod.initialPod()
+    val configuredPod = featureStep.configurePod(basePod)
+
+    assert(configuredPod.container.getName === DRIVER_CONTAINER_NAME)
+    assert(configuredPod.container.getImage === "spark-driver:latest")
+    assert(configuredPod.container.getImagePullPolicy === CONTAINER_IMAGE_PULL_POLICY)
+
+    assert(configuredPod.container.getEnv.size === 3)
+    val envs = configuredPod.container
+      .getEnv
+      .asScala
+      .map(env => (env.getName, env.getValue))
+      .toMap
+    assert(envs(DRIVER_CUSTOM_ENV1) === DRIVER_ENVS(DRIVER_CUSTOM_ENV1))
+    assert(envs(DRIVER_CUSTOM_ENV2) === DRIVER_ENVS(DRIVER_CUSTOM_ENV2))
+
+    assert(configuredPod.pod.getSpec().getImagePullSecrets.asScala ===
+      TEST_IMAGE_PULL_SECRET_OBJECTS)
+
+    assert(configuredPod.container.getEnv.asScala.exists(envVar =>
+      envVar.getName.equals(ENV_DRIVER_BIND_ADDRESS) &&
+        envVar.getValueFrom.getFieldRef.getApiVersion.equals("v1") &&
+        envVar.getValueFrom.getFieldRef.getFieldPath.equals("status.podIP")))
+
+    val resourceRequirements = configuredPod.container.getResources
+    val requests = resourceRequirements.getRequests.asScala
+    assert(requests("cpu").getAmount === "2")
+    assert(requests("memory").getAmount === "456Mi")
+    val limits = resourceRequirements.getLimits.asScala
+    assert(limits("memory").getAmount === "456Mi")
+    assert(limits("cpu").getAmount === "4")
+
+    val driverPodMetadata = configuredPod.pod.getMetadata
+    assert(driverPodMetadata.getName === "spark-driver-pod")
+    assert(driverPodMetadata.getLabels.asScala === DRIVER_LABELS)
+    assert(driverPodMetadata.getAnnotations.asScala === DRIVER_ANNOTATIONS)
+    assert(configuredPod.pod.getSpec.getRestartPolicy === "Never")
+
+    val expectedSparkConf = Map(
+      KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
+      "spark.app.id" -> APP_ID,
+      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> RESOURCE_NAME_PREFIX,
+      "spark.kubernetes.submitInDriver" -> "true")
+    assert(featureStep.getAdditionalPodSystemProperties() === expectedSparkConf)
+  }
+
+  test("Additional system properties resolve jars and set cluster-mode confs.") {
+    val allJars = Seq("local:///opt/spark/jar1.jar", "hdfs:///opt/spark/jar2.jar")
+    val allFiles = Seq("https://localhost:9000/file1.txt", "local:///opt/spark/file2.txt")
+    val sparkConf = new SparkConf()
+      .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod")
+      .setJars(allJars)
+      .set("spark.files", allFiles.mkString(","))
+      .set(CONTAINER_IMAGE, "spark-driver:latest")
+    val kubernetesConf = KubernetesConf(
+      sparkConf,
+      KubernetesDriverSpecificConf(
+        None,
+        APP_NAME,
+        MAIN_CLASS,
+        APP_ARGS),
+      RESOURCE_NAME_PREFIX,
+      APP_ID,
+      DRIVER_LABELS,
+      DRIVER_ANNOTATIONS,
+      Map.empty,
+      Map.empty)
+    val step = new BasicDriverFeatureStep(kubernetesConf)
+    val additionalProperties = step.getAdditionalPodSystemProperties()
+    val expectedSparkConf = Map(
+      KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
+      "spark.app.id" -> APP_ID,
+      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> RESOURCE_NAME_PREFIX,
+      "spark.kubernetes.submitInDriver" -> "true",
+      "spark.jars" -> "/opt/spark/jar1.jar,hdfs:///opt/spark/jar2.jar",
+      "spark.files" -> "https://localhost:9000/file1.txt,/opt/spark/file2.txt")
+    assert(additionalProperties === expectedSparkConf)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
new file mode 100644
index 0000000000000..a764f7630b5c8
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model._
+import org.mockito.MockitoAnnotations
+import org.scalatest.{BeforeAndAfter, BeforeAndAfterEach}
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.rpc.RpcEndpointAddress
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+
+class BasicExecutorFeatureStepSuite
+  extends SparkFunSuite with BeforeAndAfter with BeforeAndAfterEach {
+
+  private val APP_ID = "app-id"
+  private val DRIVER_HOSTNAME = "localhost"
+  private val DRIVER_PORT = 7098
+  private val DRIVER_ADDRESS = RpcEndpointAddress(
+    DRIVER_HOSTNAME,
+    DRIVER_PORT.toInt,
+    CoarseGrainedSchedulerBackend.ENDPOINT_NAME)
+  private val DRIVER_POD_NAME = "driver-pod"
+
+  private val DRIVER_POD_UID = "driver-uid"
+  private val RESOURCE_NAME_PREFIX = "base"
+  private val EXECUTOR_IMAGE = "executor-image"
+  private val LABELS = Map("label1key" -> "label1value")
+  private val ANNOTATIONS = Map("annotation1key" -> "annotation1value")
+  private val TEST_IMAGE_PULL_SECRETS = Seq("my-1secret-1", "my-secret-2")
+  private val TEST_IMAGE_PULL_SECRET_OBJECTS =
+    TEST_IMAGE_PULL_SECRETS.map { secret =>
+      new LocalObjectReferenceBuilder().withName(secret).build()
+    }
+  private val DRIVER_POD = new PodBuilder()
+    .withNewMetadata()
+      .withName(DRIVER_POD_NAME)
+      .withUid(DRIVER_POD_UID)
+      .endMetadata()
+    .withNewSpec()
+      .withNodeName("some-node")
+      .endSpec()
+    .withNewStatus()
+      .withHostIP("192.168.99.100")
+      .endStatus()
+    .build()
+  private var baseConf: SparkConf = _
+
+  before {
+    MockitoAnnotations.initMocks(this)
+    baseConf = new SparkConf()
+      .set(KUBERNETES_DRIVER_POD_NAME, DRIVER_POD_NAME)
+      .set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, RESOURCE_NAME_PREFIX)
+      .set(CONTAINER_IMAGE, EXECUTOR_IMAGE)
+      .set(KUBERNETES_DRIVER_SUBMIT_CHECK, true)
+      .set("spark.driver.host", DRIVER_HOSTNAME)
+      .set("spark.driver.port", DRIVER_PORT.toString)
+      .set(IMAGE_PULL_SECRETS, TEST_IMAGE_PULL_SECRETS.mkString(","))
+  }
+
+  test("basic executor pod has reasonable defaults") {
+    val step = new BasicExecutorFeatureStep(
+      KubernetesConf(
+        baseConf,
+        KubernetesExecutorSpecificConf("1", DRIVER_POD),
+        RESOURCE_NAME_PREFIX,
+        APP_ID,
+        LABELS,
+        ANNOTATIONS,
+        Map.empty,
+        Map.empty))
+    val executor = step.configurePod(SparkPod.initialPod())
+
+    // The executor pod name and default labels.
+    assert(executor.pod.getMetadata.getName === s"$RESOURCE_NAME_PREFIX-exec-1")
+    assert(executor.pod.getMetadata.getLabels.asScala === LABELS)
+    assert(executor.pod.getSpec.getImagePullSecrets.asScala === TEST_IMAGE_PULL_SECRET_OBJECTS)
+
+    // There is exactly 1 container with no volume mounts and default memory limits.
+    // Default memory limit is 1024M + 384M (minimum overhead constant).
+    assert(executor.container.getImage === EXECUTOR_IMAGE)
+    assert(executor.container.getVolumeMounts.isEmpty)
+    assert(executor.container.getResources.getLimits.size() === 1)
+    assert(executor.container.getResources
+      .getLimits.get("memory").getAmount === "1408Mi")
+
+    // The pod has no node selector, volumes.
+    assert(executor.pod.getSpec.getNodeSelector.isEmpty)
+    assert(executor.pod.getSpec.getVolumes.isEmpty)
+
+    checkEnv(executor, Map())
+    checkOwnerReferences(executor.pod, DRIVER_POD_UID)
+  }
+
+  test("executor pod hostnames get truncated to 63 characters") {
+    val conf = baseConf.clone()
+    val longPodNamePrefix = "loremipsumdolorsitametvimatelitrefficiendisuscipianturvixlegeresple"
+
+    val step = new BasicExecutorFeatureStep(
+      KubernetesConf(
+        conf,
+        KubernetesExecutorSpecificConf("1", DRIVER_POD),
+        longPodNamePrefix,
+        APP_ID,
+        LABELS,
+        ANNOTATIONS,
+        Map.empty,
+        Map.empty))
+    assert(step.configurePod(SparkPod.initialPod()).pod.getSpec.getHostname.length === 63)
+  }
+
+  test("classpath and extra java options get translated into environment variables") {
+    val conf = baseConf.clone()
+    conf.set(org.apache.spark.internal.config.EXECUTOR_JAVA_OPTIONS, "foo=bar")
+    conf.set(org.apache.spark.internal.config.EXECUTOR_CLASS_PATH, "bar=baz")
+
+    val step = new BasicExecutorFeatureStep(
+      KubernetesConf(
+        conf,
+        KubernetesExecutorSpecificConf("1", DRIVER_POD),
+        RESOURCE_NAME_PREFIX,
+        APP_ID,
+        LABELS,
+        ANNOTATIONS,
+        Map.empty,
+        Map("qux" -> "quux")))
+    val executor = step.configurePod(SparkPod.initialPod())
+
+    checkEnv(executor,
+      Map("SPARK_JAVA_OPT_0" -> "foo=bar",
+        ENV_CLASSPATH -> "bar=baz",
+        "qux" -> "quux"))
+    checkOwnerReferences(executor.pod, DRIVER_POD_UID)
+  }
+
+  // There is always exactly one controller reference, and it points to the driver pod.
+  private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
+    assert(executor.getMetadata.getOwnerReferences.size() === 1)
+    assert(executor.getMetadata.getOwnerReferences.get(0).getUid === driverPodUid)
+    assert(executor.getMetadata.getOwnerReferences.get(0).getController === true)
+  }
+
+  // Check that the expected environment variables are present.
+  private def checkEnv(executorPod: SparkPod, additionalEnvVars: Map[String, String]): Unit = {
+    val defaultEnvs = Map(
+      ENV_EXECUTOR_ID -> "1",
+      ENV_DRIVER_URL -> DRIVER_ADDRESS.toString,
+      ENV_EXECUTOR_CORES -> "1",
+      ENV_EXECUTOR_MEMORY -> "1g",
+      ENV_APPLICATION_ID -> APP_ID,
+      ENV_SPARK_CONF_DIR -> SPARK_CONF_DIR_INTERNAL,
+      ENV_EXECUTOR_POD_IP -> null) ++ additionalEnvVars
+
+    assert(executorPod.container.getEnv.size() === defaultEnvs.size)
+    val mapEnvs = executorPod.container.getEnv.asScala.map {
+      x => (x.getName, x.getValue)
+    }.toMap
+    assert(defaultEnvs === mapEnvs)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverKubernetesCredentialsStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
similarity index 67%
rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverKubernetesCredentialsStepSuite.scala
rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
index 64553d25883bb..9f817d3bfc79a 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverKubernetesCredentialsStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
@@ -14,34 +14,35 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.deploy.k8s.submit.steps
+package org.apache.spark.deploy.k8s.features
 
 import java.io.File
 
-import scala.collection.JavaConverters._
-
 import com.google.common.base.Charsets
 import com.google.common.io.{BaseEncoding, Files}
 import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, Secret}
+import org.mockito.{Mock, MockitoAnnotations}
 import org.scalatest.BeforeAndAfter
+import scala.collection.JavaConverters._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
 import org.apache.spark.util.Utils
 
-class DriverKubernetesCredentialsStepSuite extends SparkFunSuite with BeforeAndAfter {
+class DriverKubernetesCredentialsFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
 
   private val KUBERNETES_RESOURCE_NAME_PREFIX = "spark"
+  private val APP_ID = "k8s-app"
   private var credentialsTempDirectory: File = _
-  private val BASE_DRIVER_SPEC = new KubernetesDriverSpec(
-    driverPod = new PodBuilder().build(),
-    driverContainer = new ContainerBuilder().build(),
-    driverSparkConf = new SparkConf(false),
-    otherKubernetesResources = Seq.empty[HasMetadata])
+  private val BASE_DRIVER_POD = SparkPod.initialPod()
+
+  @Mock
+  private var driverSpecificConf: KubernetesDriverSpecificConf = _
 
   before {
+    MockitoAnnotations.initMocks(this)
     credentialsTempDirectory = Utils.createTempDir()
   }
 
@@ -50,13 +51,19 @@ class DriverKubernetesCredentialsStepSuite extends SparkFunSuite with BeforeAndA
   }
 
   test("Don't set any credentials") {
-    val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep(
-        new SparkConf(false), KUBERNETES_RESOURCE_NAME_PREFIX)
-    val preparedDriverSpec = kubernetesCredentialsStep.configureDriver(BASE_DRIVER_SPEC)
-    assert(preparedDriverSpec.driverPod === BASE_DRIVER_SPEC.driverPod)
-    assert(preparedDriverSpec.driverContainer === BASE_DRIVER_SPEC.driverContainer)
-    assert(preparedDriverSpec.otherKubernetesResources.isEmpty)
-    assert(preparedDriverSpec.driverSparkConf.getAll.isEmpty)
+    val kubernetesConf = KubernetesConf(
+      new SparkConf(false),
+      driverSpecificConf,
+      KUBERNETES_RESOURCE_NAME_PREFIX,
+      APP_ID,
+      Map.empty,
+      Map.empty,
+      Map.empty,
+      Map.empty)
+    val kubernetesCredentialsStep = new DriverKubernetesCredentialsFeatureStep(kubernetesConf)
+    assert(kubernetesCredentialsStep.configurePod(BASE_DRIVER_POD) === BASE_DRIVER_POD)
+    assert(kubernetesCredentialsStep.getAdditionalPodSystemProperties().isEmpty)
+    assert(kubernetesCredentialsStep.getAdditionalKubernetesResources().isEmpty)
   }
 
   test("Only set credentials that are manually mounted.") {
@@ -73,14 +80,23 @@ class DriverKubernetesCredentialsStepSuite extends SparkFunSuite with BeforeAndA
       .set(
         s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX",
         "/mnt/secrets/my-ca.pem")
+    val kubernetesConf = KubernetesConf(
+      submissionSparkConf,
+      driverSpecificConf,
+      KUBERNETES_RESOURCE_NAME_PREFIX,
+      APP_ID,
+      Map.empty,
+      Map.empty,
+      Map.empty,
+      Map.empty)
 
-    val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep(
-      submissionSparkConf, KUBERNETES_RESOURCE_NAME_PREFIX)
-    val preparedDriverSpec = kubernetesCredentialsStep.configureDriver(BASE_DRIVER_SPEC)
-    assert(preparedDriverSpec.driverPod === BASE_DRIVER_SPEC.driverPod)
-    assert(preparedDriverSpec.driverContainer === BASE_DRIVER_SPEC.driverContainer)
-    assert(preparedDriverSpec.otherKubernetesResources.isEmpty)
-    assert(preparedDriverSpec.driverSparkConf.getAll.toMap === submissionSparkConf.getAll.toMap)
+    val kubernetesCredentialsStep = new DriverKubernetesCredentialsFeatureStep(kubernetesConf)
+    assert(kubernetesCredentialsStep.configurePod(BASE_DRIVER_POD) === BASE_DRIVER_POD)
+    assert(kubernetesCredentialsStep.getAdditionalKubernetesResources().isEmpty)
+    val resolvedProperties = kubernetesCredentialsStep.getAdditionalPodSystemProperties()
+    resolvedProperties.foreach { case (propKey, propValue) =>
+      assert(submissionSparkConf.get(propKey) === propValue)
+    }
   }
 
   test("Mount credentials from the submission client as a secret.") {
@@ -100,10 +116,17 @@ class DriverKubernetesCredentialsStepSuite extends SparkFunSuite with BeforeAndA
       .set(
         s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX",
         caCertFile.getAbsolutePath)
-    val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep(
-      submissionSparkConf, KUBERNETES_RESOURCE_NAME_PREFIX)
-    val preparedDriverSpec = kubernetesCredentialsStep.configureDriver(
-      BASE_DRIVER_SPEC.copy(driverSparkConf = submissionSparkConf))
+    val kubernetesConf = KubernetesConf(
+      submissionSparkConf,
+      driverSpecificConf,
+      KUBERNETES_RESOURCE_NAME_PREFIX,
+      APP_ID,
+      Map.empty,
+      Map.empty,
+      Map.empty,
+      Map.empty)
+    val kubernetesCredentialsStep = new DriverKubernetesCredentialsFeatureStep(kubernetesConf)
+    val resolvedProperties = kubernetesCredentialsStep.getAdditionalPodSystemProperties()
     val expectedSparkConf = Map(
       s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX" -> "<present_but_redacted>",
       s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX" ->
@@ -113,16 +136,13 @@ class DriverKubernetesCredentialsStepSuite extends SparkFunSuite with BeforeAndA
       s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX" ->
         DRIVER_CREDENTIALS_CLIENT_CERT_PATH,
       s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX" ->
-        DRIVER_CREDENTIALS_CA_CERT_PATH,
-      s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX" ->
-        clientKeyFile.getAbsolutePath,
-      s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX" ->
-        clientCertFile.getAbsolutePath,
-      s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX" ->
-        caCertFile.getAbsolutePath)
-    assert(preparedDriverSpec.driverSparkConf.getAll.toMap === expectedSparkConf)
-    assert(preparedDriverSpec.otherKubernetesResources.size === 1)
-    val credentialsSecret = preparedDriverSpec.otherKubernetesResources.head.asInstanceOf[Secret]
+        DRIVER_CREDENTIALS_CA_CERT_PATH)
+    assert(resolvedProperties === expectedSparkConf)
+    assert(kubernetesCredentialsStep.getAdditionalKubernetesResources().size === 1)
+    val credentialsSecret = kubernetesCredentialsStep
+      .getAdditionalKubernetesResources()
+      .head
+      .asInstanceOf[Secret]
     assert(credentialsSecret.getMetadata.getName ===
       s"$KUBERNETES_RESOURCE_NAME_PREFIX-kubernetes-credentials")
     val decodedSecretData = credentialsSecret.getData.asScala.map { data =>
@@ -134,12 +154,13 @@ class DriverKubernetesCredentialsStepSuite extends SparkFunSuite with BeforeAndA
       DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME -> "key",
       DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME -> "cert")
     assert(decodedSecretData === expectedSecretData)
-    val driverPodVolumes = preparedDriverSpec.driverPod.getSpec.getVolumes.asScala
+    val driverPod = kubernetesCredentialsStep.configurePod(BASE_DRIVER_POD)
+    val driverPodVolumes = driverPod.pod.getSpec.getVolumes.asScala
     assert(driverPodVolumes.size === 1)
     assert(driverPodVolumes.head.getName === DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)
     assert(driverPodVolumes.head.getSecret != null)
     assert(driverPodVolumes.head.getSecret.getSecretName === credentialsSecret.getMetadata.getName)
-    val driverContainerVolumeMount = preparedDriverSpec.driverContainer.getVolumeMounts.asScala
+    val driverContainerVolumeMount = driverPod.container.getVolumeMounts.asScala
     assert(driverContainerVolumeMount.size === 1)
     assert(driverContainerVolumeMount.head.getName === DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)
     assert(driverContainerVolumeMount.head.getMountPath === DRIVER_CREDENTIALS_SECRETS_BASE_DIR)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
new file mode 100644
index 0000000000000..c299d56865ec0
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import io.fabric8.kubernetes.api.model.Service
+import org.mockito.{Mock, MockitoAnnotations}
+import org.mockito.Mockito.when
+import org.scalatest.BeforeAndAfter
+import scala.collection.JavaConverters._
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.util.Clock
+
+class DriverServiceFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
+
+  private val SHORT_RESOURCE_NAME_PREFIX =
+    "a" * (DriverServiceFeatureStep.MAX_SERVICE_NAME_LENGTH -
+      DriverServiceFeatureStep.DRIVER_SVC_POSTFIX.length)
+
+  private val LONG_RESOURCE_NAME_PREFIX =
+    "a" * (DriverServiceFeatureStep.MAX_SERVICE_NAME_LENGTH -
+      DriverServiceFeatureStep.DRIVER_SVC_POSTFIX.length + 1)
+  private val DRIVER_LABELS = Map(
+    "label1key" -> "label1value",
+    "label2key" -> "label2value")
+
+  @Mock
+  private var clock: Clock = _
+
+  private var sparkConf: SparkConf = _
+
+  before {
+    MockitoAnnotations.initMocks(this)
+    sparkConf = new SparkConf(false)
+  }
+
+  test("Headless service has a port for the driver RPC and the block manager.") {
+    sparkConf = sparkConf
+      .set("spark.driver.port", "9000")
+      .set(org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT, 8080)
+    val configurationStep = new DriverServiceFeatureStep(
+      KubernetesConf(
+        sparkConf,
+        KubernetesDriverSpecificConf(
+          None, "main", "app", Seq.empty),
+        SHORT_RESOURCE_NAME_PREFIX,
+        "app-id",
+        DRIVER_LABELS,
+        Map.empty,
+        Map.empty,
+        Map.empty))
+    assert(configurationStep.configurePod(SparkPod.initialPod()) === SparkPod.initialPod())
+    assert(configurationStep.getAdditionalKubernetesResources().size === 1)
+    assert(configurationStep.getAdditionalKubernetesResources().head.isInstanceOf[Service])
+    val driverService = configurationStep
+      .getAdditionalKubernetesResources()
+      .head
+      .asInstanceOf[Service]
+    verifyService(
+      9000,
+      8080,
+      s"$SHORT_RESOURCE_NAME_PREFIX${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
+      driverService)
+  }
+
+  test("Hostname and ports are set according to the service name.") {
+    val configurationStep = new DriverServiceFeatureStep(
+      KubernetesConf(
+        sparkConf
+          .set("spark.driver.port", "9000")
+          .set(org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT, 8080)
+          .set(KUBERNETES_NAMESPACE, "my-namespace"),
+        KubernetesDriverSpecificConf(
+          None, "main", "app", Seq.empty),
+        SHORT_RESOURCE_NAME_PREFIX,
+        "app-id",
+        DRIVER_LABELS,
+        Map.empty,
+        Map.empty,
+        Map.empty))
+    val expectedServiceName = SHORT_RESOURCE_NAME_PREFIX +
+      DriverServiceFeatureStep.DRIVER_SVC_POSTFIX
+    val expectedHostName = s"$expectedServiceName.my-namespace.svc"
+    val additionalProps = configurationStep.getAdditionalPodSystemProperties()
+    verifySparkConfHostNames(additionalProps, expectedHostName)
+  }
+
+  test("Ports should resolve to defaults in SparkConf and in the service.") {
+    val configurationStep = new DriverServiceFeatureStep(
+      KubernetesConf(
+        sparkConf,
+        KubernetesDriverSpecificConf(
+          None, "main", "app", Seq.empty),
+        SHORT_RESOURCE_NAME_PREFIX,
+        "app-id",
+        DRIVER_LABELS,
+        Map.empty,
+        Map.empty,
+        Map.empty))
+    val resolvedService = configurationStep
+      .getAdditionalKubernetesResources()
+      .head
+      .asInstanceOf[Service]
+    verifyService(
+      DEFAULT_DRIVER_PORT,
+      DEFAULT_BLOCKMANAGER_PORT,
+      s"$SHORT_RESOURCE_NAME_PREFIX${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
+      resolvedService)
+    val additionalProps = configurationStep.getAdditionalPodSystemProperties()
+    assert(additionalProps("spark.driver.port") === DEFAULT_DRIVER_PORT.toString)
+    assert(additionalProps(org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT.key)
+      === DEFAULT_BLOCKMANAGER_PORT.toString)
+  }
+
+  test("Long prefixes should switch to using a generated name.") {
+    when(clock.getTimeMillis()).thenReturn(10000)
+    val configurationStep = new DriverServiceFeatureStep(
+      KubernetesConf(
+        sparkConf.set(KUBERNETES_NAMESPACE, "my-namespace"),
+        KubernetesDriverSpecificConf(
+          None, "main", "app", Seq.empty),
+        LONG_RESOURCE_NAME_PREFIX,
+        "app-id",
+        DRIVER_LABELS,
+        Map.empty,
+        Map.empty,
+        Map.empty),
+      clock)
+    val driverService = configurationStep
+      .getAdditionalKubernetesResources()
+      .head
+      .asInstanceOf[Service]
+    val expectedServiceName = s"spark-10000${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}"
+    assert(driverService.getMetadata.getName === expectedServiceName)
+    val expectedHostName = s"$expectedServiceName.my-namespace.svc"
+    val additionalProps = configurationStep.getAdditionalPodSystemProperties()
+    verifySparkConfHostNames(additionalProps, expectedHostName)
+  }
+
+  test("Disallow bind address and driver host to be set explicitly.") {
+    try {
+      new DriverServiceFeatureStep(
+        KubernetesConf(
+          sparkConf.set(org.apache.spark.internal.config.DRIVER_BIND_ADDRESS, "host"),
+          KubernetesDriverSpecificConf(
+            None, "main", "app", Seq.empty),
+          LONG_RESOURCE_NAME_PREFIX,
+          "app-id",
+          DRIVER_LABELS,
+          Map.empty,
+          Map.empty,
+          Map.empty),
+        clock)
+      fail("The driver bind address should not be allowed.")
+    } catch {
+      case e: Throwable =>
+        assert(e.getMessage ===
+          s"requirement failed: ${DriverServiceFeatureStep.DRIVER_BIND_ADDRESS_KEY} is" +
+          " not supported in Kubernetes mode, as the driver's bind address is managed" +
+          " and set to the driver pod's IP address.")
+    }
+    sparkConf.remove(org.apache.spark.internal.config.DRIVER_BIND_ADDRESS)
+    sparkConf.set(org.apache.spark.internal.config.DRIVER_HOST_ADDRESS, "host")
+    try {
+      new DriverServiceFeatureStep(
+        KubernetesConf(
+          sparkConf,
+          KubernetesDriverSpecificConf(
+            None, "main", "app", Seq.empty),
+          LONG_RESOURCE_NAME_PREFIX,
+          "app-id",
+          DRIVER_LABELS,
+          Map.empty,
+          Map.empty,
+          Map.empty),
+        clock)
+      fail("The driver host address should not be allowed.")
+    } catch {
+      case e: Throwable =>
+        assert(e.getMessage ===
+          s"requirement failed: ${DriverServiceFeatureStep.DRIVER_HOST_KEY} is" +
+          " not supported in Kubernetes mode, as the driver's hostname will be managed via" +
+          " a Kubernetes service.")
+    }
+  }
+
+  private def verifyService(
+      driverPort: Int,
+      blockManagerPort: Int,
+      expectedServiceName: String,
+      service: Service): Unit = {
+    assert(service.getMetadata.getName === expectedServiceName)
+    assert(service.getSpec.getClusterIP === "None")
+    assert(service.getSpec.getSelector.asScala === DRIVER_LABELS)
+    assert(service.getSpec.getPorts.size() === 2)
+    val driverServicePorts = service.getSpec.getPorts.asScala
+    assert(driverServicePorts.head.getName === DRIVER_PORT_NAME)
+    assert(driverServicePorts.head.getPort.intValue() === driverPort)
+    assert(driverServicePorts.head.getTargetPort.getIntVal === driverPort)
+    assert(driverServicePorts(1).getName === BLOCK_MANAGER_PORT_NAME)
+    assert(driverServicePorts(1).getPort.intValue() === blockManagerPort)
+    assert(driverServicePorts(1).getTargetPort.getIntVal === blockManagerPort)
+  }
+
+  private def verifySparkConfHostNames(
+      driverSparkConf: Map[String, String], expectedHostName: String): Unit = {
+    assert(driverSparkConf(
+      org.apache.spark.internal.config.DRIVER_HOST_ADDRESS.key) === expectedHostName)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
new file mode 100644
index 0000000000000..27bff74ce38af
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import io.fabric8.kubernetes.api.model.{HasMetadata, PodBuilder, SecretBuilder}
+import org.mockito.Matchers
+import org.mockito.Mockito._
+import org.mockito.invocation.InvocationOnMock
+import org.mockito.stubbing.Answer
+
+import org.apache.spark.deploy.k8s.SparkPod
+
+object KubernetesFeaturesTestUtils {
+
+  def getMockConfigStepForStepType[T <: KubernetesFeatureConfigStep](
+    stepType: String, stepClass: Class[T]): T = {
+    val mockStep = mock(stepClass)
+    when(mockStep.getAdditionalKubernetesResources()).thenReturn(
+      getSecretsForStepType(stepType))
+
+    when(mockStep.getAdditionalPodSystemProperties())
+      .thenReturn(Map(stepType -> stepType))
+    when(mockStep.configurePod(Matchers.any(classOf[SparkPod])))
+      .thenAnswer(new Answer[SparkPod]() {
+        override def answer(invocation: InvocationOnMock): SparkPod = {
+          val originalPod = invocation.getArgumentAt(0, classOf[SparkPod])
+          val configuredPod = new PodBuilder(originalPod.pod)
+            .editOrNewMetadata()
+            .addToLabels(stepType, stepType)
+            .endMetadata()
+            .build()
+          SparkPod(configuredPod, originalPod.container)
+        }
+      })
+    mockStep
+  }
+
+  def getSecretsForStepType[T <: KubernetesFeatureConfigStep](stepType: String)
+    : Seq[HasMetadata] = {
+    Seq(new SecretBuilder()
+      .withNewMetadata()
+      .withName(stepType)
+      .endMetadata()
+      .build())
+  }
+
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverMountSecretsStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStepSuite.scala
similarity index 64%
rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverMountSecretsStepSuite.scala
rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStepSuite.scala
index 960d0bda1d011..9d02f56cc206d 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverMountSecretsStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStepSuite.scala
@@ -14,29 +14,38 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.deploy.k8s.submit.steps
+package org.apache.spark.deploy.k8s.features
+
+import io.fabric8.kubernetes.api.model.PodBuilder
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{MountSecretsBootstrap, SecretVolumeUtils}
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SecretVolumeUtils, SparkPod}
 
-class DriverMountSecretsStepSuite extends SparkFunSuite {
+class MountSecretsFeatureStepSuite extends SparkFunSuite {
 
   private val SECRET_FOO = "foo"
   private val SECRET_BAR = "bar"
   private val SECRET_MOUNT_PATH = "/etc/secrets/driver"
 
   test("mounts all given secrets") {
-    val baseDriverSpec = KubernetesDriverSpec.initialSpec(new SparkConf(false))
+    val baseDriverPod = SparkPod.initialPod()
     val secretNamesToMountPaths = Map(
       SECRET_FOO -> SECRET_MOUNT_PATH,
       SECRET_BAR -> SECRET_MOUNT_PATH)
+    val sparkConf = new SparkConf(false)
+    val kubernetesConf = KubernetesConf(
+      sparkConf,
+      KubernetesExecutorSpecificConf("1", new PodBuilder().build()),
+      "resource-name-prefix",
+      "app-id",
+      Map.empty,
+      Map.empty,
+      secretNamesToMountPaths,
+      Map.empty)
 
-    val mountSecretsBootstrap = new MountSecretsBootstrap(secretNamesToMountPaths)
-    val mountSecretsStep = new DriverMountSecretsStep(mountSecretsBootstrap)
-    val configuredDriverSpec = mountSecretsStep.configureDriver(baseDriverSpec)
-    val driverPodWithSecretsMounted = configuredDriverSpec.driverPod
-    val driverContainerWithSecretsMounted = configuredDriverSpec.driverContainer
+    val step = new MountSecretsFeatureStep(kubernetesConf)
+    val driverPodWithSecretsMounted = step.configurePod(baseDriverPod).pod
+    val driverContainerWithSecretsMounted = step.configurePod(baseDriverPod).container
 
     Seq(s"$SECRET_FOO-volume", s"$SECRET_BAR-volume").foreach { volumeName =>
       assert(SecretVolumeUtils.podHasVolume(driverPodWithSecretsMounted, volumeName))
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index 6a501592f42a3..c1b203e03a357 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -16,22 +16,17 @@
  */
 package org.apache.spark.deploy.k8s.submit
 
-import scala.collection.JavaConverters._
-
-import com.google.common.collect.Iterables
 import io.fabric8.kubernetes.api.model._
 import io.fabric8.kubernetes.client.{KubernetesClient, Watch}
 import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource}
 import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations}
 import org.mockito.Mockito.{doReturn, verify, when}
-import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 import org.scalatest.mockito.MockitoSugar._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpec, KubernetesDriverSpecificConf, SparkPod}
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.steps.DriverConfigurationStep
 
 class ClientSuite extends SparkFunSuite with BeforeAndAfter {
 
@@ -39,6 +34,74 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
   private val DRIVER_POD_API_VERSION = "v1"
   private val DRIVER_POD_KIND = "pod"
   private val KUBERNETES_RESOURCE_PREFIX = "resource-example"
+  private val POD_NAME = "driver"
+  private val CONTAINER_NAME = "container"
+  private val APP_ID = "app-id"
+  private val APP_NAME = "app"
+  private val MAIN_CLASS = "main"
+  private val APP_ARGS = Seq("arg1", "arg2")
+  private val RESOLVED_JAVA_OPTIONS = Map(
+    "conf1key" -> "conf1value",
+    "conf2key" -> "conf2value")
+  private val BUILT_DRIVER_POD =
+    new PodBuilder()
+      .withNewMetadata()
+        .withName(POD_NAME)
+        .endMetadata()
+      .withNewSpec()
+        .withHostname("localhost")
+        .endSpec()
+      .build()
+  private val BUILT_DRIVER_CONTAINER = new ContainerBuilder().withName(CONTAINER_NAME).build()
+  private val ADDITIONAL_RESOURCES = Seq(
+    new SecretBuilder().withNewMetadata().withName("secret").endMetadata().build())
+
+  private val BUILT_KUBERNETES_SPEC = KubernetesDriverSpec(
+    SparkPod(BUILT_DRIVER_POD, BUILT_DRIVER_CONTAINER),
+    ADDITIONAL_RESOURCES,
+    RESOLVED_JAVA_OPTIONS)
+
+  private val FULL_EXPECTED_CONTAINER = new ContainerBuilder(BUILT_DRIVER_CONTAINER)
+    .addNewEnv()
+      .withName(ENV_SPARK_CONF_DIR)
+      .withValue(SPARK_CONF_DIR_INTERNAL)
+      .endEnv()
+    .addNewVolumeMount()
+      .withName(SPARK_CONF_VOLUME)
+      .withMountPath(SPARK_CONF_DIR_INTERNAL)
+      .endVolumeMount()
+    .build()
+  private val FULL_EXPECTED_POD = new PodBuilder(BUILT_DRIVER_POD)
+    .editSpec()
+      .addToContainers(FULL_EXPECTED_CONTAINER)
+      .addNewVolume()
+        .withName(SPARK_CONF_VOLUME)
+        .withNewConfigMap().withName(s"$KUBERNETES_RESOURCE_PREFIX-driver-conf-map").endConfigMap()
+        .endVolume()
+      .endSpec()
+    .build()
+
+  private val POD_WITH_OWNER_REFERENCE = new PodBuilder(FULL_EXPECTED_POD)
+    .editMetadata()
+      .withUid(DRIVER_POD_UID)
+      .endMetadata()
+    .withApiVersion(DRIVER_POD_API_VERSION)
+    .withKind(DRIVER_POD_KIND)
+    .build()
+
+  private val ADDITIONAL_RESOURCES_WITH_OWNER_REFERENCES = ADDITIONAL_RESOURCES.map { secret =>
+    new SecretBuilder(secret)
+      .editMetadata()
+        .addNewOwnerReference()
+          .withName(POD_NAME)
+          .withApiVersion(DRIVER_POD_API_VERSION)
+          .withKind(DRIVER_POD_KIND)
+          .withController(true)
+          .withUid(DRIVER_POD_UID)
+          .endOwnerReference()
+        .endMetadata()
+      .build()
+  }
 
   private type ResourceList = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[
       HasMetadata, Boolean]
@@ -56,113 +119,86 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
   @Mock
   private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _
 
+  @Mock
+  private var driverBuilder: KubernetesDriverBuilder = _
+
   @Mock
   private var resourceList: ResourceList = _
 
-  private val submissionSteps = Seq(FirstTestConfigurationStep, SecondTestConfigurationStep)
+  private var kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf] = _
+
+  private var sparkConf: SparkConf = _
   private var createdPodArgumentCaptor: ArgumentCaptor[Pod] = _
   private var createdResourcesArgumentCaptor: ArgumentCaptor[HasMetadata] = _
-  private var createdContainerArgumentCaptor: ArgumentCaptor[Container] = _
 
   before {
     MockitoAnnotations.initMocks(this)
+    sparkConf = new SparkConf(false)
+    kubernetesConf = KubernetesConf[KubernetesDriverSpecificConf](
+      sparkConf,
+      KubernetesDriverSpecificConf(None, MAIN_CLASS, APP_NAME, APP_ARGS),
+      KUBERNETES_RESOURCE_PREFIX,
+      APP_ID,
+      Map.empty,
+      Map.empty,
+      Map.empty,
+      Map.empty)
+    when(driverBuilder.buildFromFeatures(kubernetesConf)).thenReturn(BUILT_KUBERNETES_SPEC)
     when(kubernetesClient.pods()).thenReturn(podOperations)
-    when(podOperations.withName(FirstTestConfigurationStep.podName)).thenReturn(namedPods)
+    when(podOperations.withName(POD_NAME)).thenReturn(namedPods)
 
     createdPodArgumentCaptor = ArgumentCaptor.forClass(classOf[Pod])
     createdResourcesArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata])
-    when(podOperations.create(createdPodArgumentCaptor.capture())).thenAnswer(new Answer[Pod] {
-      override def answer(invocation: InvocationOnMock): Pod = {
-        new PodBuilder(invocation.getArgumentAt(0, classOf[Pod]))
-          .editMetadata()
-            .withUid(DRIVER_POD_UID)
-            .endMetadata()
-          .withApiVersion(DRIVER_POD_API_VERSION)
-          .withKind(DRIVER_POD_KIND)
-          .build()
-      }
-    })
-    when(podOperations.withName(FirstTestConfigurationStep.podName)).thenReturn(namedPods)
+    when(podOperations.create(FULL_EXPECTED_POD)).thenReturn(POD_WITH_OWNER_REFERENCE)
     when(namedPods.watch(loggingPodStatusWatcher)).thenReturn(mock[Watch])
     doReturn(resourceList)
       .when(kubernetesClient)
       .resourceList(createdResourcesArgumentCaptor.capture())
   }
 
-  test("The client should configure the pod with the submission steps.") {
+  test("The client should configure the pod using the builder.") {
     val submissionClient = new Client(
-      submissionSteps,
-      new SparkConf(false),
+      driverBuilder,
+      kubernetesConf,
       kubernetesClient,
       false,
       "spark",
       loggingPodStatusWatcher,
       KUBERNETES_RESOURCE_PREFIX)
     submissionClient.run()
-    val createdPod = createdPodArgumentCaptor.getValue
-    assert(createdPod.getMetadata.getName === FirstTestConfigurationStep.podName)
-    assert(createdPod.getMetadata.getLabels.asScala ===
-      Map(FirstTestConfigurationStep.labelKey -> FirstTestConfigurationStep.labelValue))
-    assert(createdPod.getMetadata.getAnnotations.asScala ===
-      Map(SecondTestConfigurationStep.annotationKey ->
-        SecondTestConfigurationStep.annotationValue))
-    assert(createdPod.getSpec.getContainers.size() === 1)
-    assert(createdPod.getSpec.getContainers.get(0).getName ===
-      SecondTestConfigurationStep.containerName)
+    verify(podOperations).create(FULL_EXPECTED_POD)
   }
 
   test("The client should create Kubernetes resources") {
-    val EXAMPLE_JAVA_OPTS = "-XX:+HeapDumpOnOutOfMemoryError -XX:+PrintGCDetails"
-    val EXPECTED_JAVA_OPTS = "-XX\\:+HeapDumpOnOutOfMemoryError -XX\\:+PrintGCDetails"
     val submissionClient = new Client(
-      submissionSteps,
-      new SparkConf(false)
-        .set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, EXAMPLE_JAVA_OPTS),
+      driverBuilder,
+      kubernetesConf,
       kubernetesClient,
       false,
       "spark",
       loggingPodStatusWatcher,
       KUBERNETES_RESOURCE_PREFIX)
     submissionClient.run()
-    val createdPod = createdPodArgumentCaptor.getValue
     val otherCreatedResources = createdResourcesArgumentCaptor.getAllValues
     assert(otherCreatedResources.size === 2)
-    val secrets = otherCreatedResources.toArray
-      .filter(_.isInstanceOf[Secret]).map(_.asInstanceOf[Secret])
+    val secrets = otherCreatedResources.toArray.filter(_.isInstanceOf[Secret]).toSeq
+    assert(secrets === ADDITIONAL_RESOURCES_WITH_OWNER_REFERENCES)
     val configMaps = otherCreatedResources.toArray
       .filter(_.isInstanceOf[ConfigMap]).map(_.asInstanceOf[ConfigMap])
     assert(secrets.nonEmpty)
-    val secret = secrets.head
-    assert(secret.getMetadata.getName === FirstTestConfigurationStep.secretName)
-    assert(secret.getData.asScala ===
-      Map(FirstTestConfigurationStep.secretKey -> FirstTestConfigurationStep.secretData))
-    val ownerReference = Iterables.getOnlyElement(secret.getMetadata.getOwnerReferences)
-    assert(ownerReference.getName === createdPod.getMetadata.getName)
-    assert(ownerReference.getKind === DRIVER_POD_KIND)
-    assert(ownerReference.getUid === DRIVER_POD_UID)
-    assert(ownerReference.getApiVersion === DRIVER_POD_API_VERSION)
     assert(configMaps.nonEmpty)
     val configMap = configMaps.head
     assert(configMap.getMetadata.getName ===
       s"$KUBERNETES_RESOURCE_PREFIX-driver-conf-map")
     assert(configMap.getData.containsKey(SPARK_CONF_FILE_NAME))
-    assert(configMap.getData.get(SPARK_CONF_FILE_NAME).contains(EXPECTED_JAVA_OPTS))
-    assert(configMap.getData.get(SPARK_CONF_FILE_NAME).contains(
-      "spark.custom-conf=custom-conf-value"))
-    val driverContainer = Iterables.getOnlyElement(createdPod.getSpec.getContainers)
-    assert(driverContainer.getName === SecondTestConfigurationStep.containerName)
-    val driverEnv = driverContainer.getEnv.asScala.head
-    assert(driverEnv.getName === ENV_SPARK_CONF_DIR)
-    assert(driverEnv.getValue === SPARK_CONF_DIR_INTERNAL)
-    val driverMount = driverContainer.getVolumeMounts.asScala.head
-    assert(driverMount.getName === SPARK_CONF_VOLUME)
-    assert(driverMount.getMountPath === SPARK_CONF_DIR_INTERNAL)
+    assert(configMap.getData.get(SPARK_CONF_FILE_NAME).contains("conf1key=conf1value"))
+    assert(configMap.getData.get(SPARK_CONF_FILE_NAME).contains("conf2key=conf2value"))
   }
 
   test("Waiting for app completion should stall on the watcher") {
     val submissionClient = new Client(
-      submissionSteps,
-      new SparkConf(false),
+      driverBuilder,
+      kubernetesConf,
       kubernetesClient,
       true,
       "spark",
@@ -171,56 +207,4 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
     submissionClient.run()
     verify(loggingPodStatusWatcher).awaitCompletion()
   }
-
-}
-
-private object FirstTestConfigurationStep extends DriverConfigurationStep {
-
-  val podName = "test-pod"
-  val secretName = "test-secret"
-  val labelKey = "first-submit"
-  val labelValue = "true"
-  val secretKey = "secretKey"
-  val secretData = "secretData"
-
-  override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
-    val modifiedPod = new PodBuilder(driverSpec.driverPod)
-      .editMetadata()
-      .withName(podName)
-      .addToLabels(labelKey, labelValue)
-      .endMetadata()
-      .build()
-    val additionalResource = new SecretBuilder()
-      .withNewMetadata()
-      .withName(secretName)
-      .endMetadata()
-      .addToData(secretKey, secretData)
-      .build()
-    driverSpec.copy(
-      driverPod = modifiedPod,
-      otherKubernetesResources = driverSpec.otherKubernetesResources ++ Seq(additionalResource))
-  }
-}
-
-private object SecondTestConfigurationStep extends DriverConfigurationStep {
-  val annotationKey = "second-submit"
-  val annotationValue = "submitted"
-  val sparkConfKey = "spark.custom-conf"
-  val sparkConfValue = "custom-conf-value"
-  val containerName = "driverContainer"
-  override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
-    val modifiedPod = new PodBuilder(driverSpec.driverPod)
-      .editMetadata()
-        .addToAnnotations(annotationKey, annotationValue)
-        .endMetadata()
-      .build()
-    val resolvedSparkConf = driverSpec.driverSparkConf.clone().set(sparkConfKey, sparkConfValue)
-    val modifiedContainer = new ContainerBuilder(driverSpec.driverContainer)
-      .withName(containerName)
-      .build()
-    driverSpec.copy(
-      driverPod = modifiedPod,
-      driverSparkConf = resolvedSparkConf,
-      driverContainer = modifiedContainer)
-  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/DriverConfigOrchestratorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/DriverConfigOrchestratorSuite.scala
deleted file mode 100644
index df34d2dbcb5be..0000000000000
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/DriverConfigOrchestratorSuite.scala
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit
-
-import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.submit.steps._
-
-class DriverConfigOrchestratorSuite extends SparkFunSuite {
-
-  private val DRIVER_IMAGE = "driver-image"
-  private val IC_IMAGE = "init-container-image"
-  private val APP_ID = "spark-app-id"
-  private val KUBERNETES_RESOURCE_PREFIX = "example-prefix"
-  private val APP_NAME = "spark"
-  private val MAIN_CLASS = "org.apache.spark.examples.SparkPi"
-  private val APP_ARGS = Array("arg1", "arg2")
-  private val SECRET_FOO = "foo"
-  private val SECRET_BAR = "bar"
-  private val SECRET_MOUNT_PATH = "/etc/secrets/driver"
-
-  test("Base submission steps with a main app resource.") {
-    val sparkConf = new SparkConf(false).set(CONTAINER_IMAGE, DRIVER_IMAGE)
-    val mainAppResource = JavaMainAppResource("local:///var/apps/jars/main.jar")
-    val orchestrator = new DriverConfigOrchestrator(
-      APP_ID,
-      KUBERNETES_RESOURCE_PREFIX,
-      Some(mainAppResource),
-      APP_NAME,
-      MAIN_CLASS,
-      APP_ARGS,
-      sparkConf)
-    validateStepTypes(
-      orchestrator,
-      classOf[BasicDriverConfigurationStep],
-      classOf[DriverServiceBootstrapStep],
-      classOf[DriverKubernetesCredentialsStep],
-      classOf[DependencyResolutionStep])
-  }
-
-  test("Base submission steps without a main app resource.") {
-    val sparkConf = new SparkConf(false).set(CONTAINER_IMAGE, DRIVER_IMAGE)
-    val orchestrator = new DriverConfigOrchestrator(
-      APP_ID,
-      KUBERNETES_RESOURCE_PREFIX,
-      Option.empty,
-      APP_NAME,
-      MAIN_CLASS,
-      APP_ARGS,
-      sparkConf)
-    validateStepTypes(
-      orchestrator,
-      classOf[BasicDriverConfigurationStep],
-      classOf[DriverServiceBootstrapStep],
-      classOf[DriverKubernetesCredentialsStep])
-  }
-
-  test("Submission steps with driver secrets to mount") {
-    val sparkConf = new SparkConf(false)
-      .set(CONTAINER_IMAGE, DRIVER_IMAGE)
-      .set(s"$KUBERNETES_DRIVER_SECRETS_PREFIX$SECRET_FOO", SECRET_MOUNT_PATH)
-      .set(s"$KUBERNETES_DRIVER_SECRETS_PREFIX$SECRET_BAR", SECRET_MOUNT_PATH)
-    val mainAppResource = JavaMainAppResource("local:///var/apps/jars/main.jar")
-    val orchestrator = new DriverConfigOrchestrator(
-      APP_ID,
-      KUBERNETES_RESOURCE_PREFIX,
-      Some(mainAppResource),
-      APP_NAME,
-      MAIN_CLASS,
-      APP_ARGS,
-      sparkConf)
-    validateStepTypes(
-      orchestrator,
-      classOf[BasicDriverConfigurationStep],
-      classOf[DriverServiceBootstrapStep],
-      classOf[DriverKubernetesCredentialsStep],
-      classOf[DependencyResolutionStep],
-      classOf[DriverMountSecretsStep])
-  }
-
-  test("Submission using client local dependencies") {
-    val sparkConf = new SparkConf(false)
-      .set(CONTAINER_IMAGE, DRIVER_IMAGE)
-    var orchestrator = new DriverConfigOrchestrator(
-      APP_ID,
-      KUBERNETES_RESOURCE_PREFIX,
-      Some(JavaMainAppResource("file:///var/apps/jars/main.jar")),
-      APP_NAME,
-      MAIN_CLASS,
-      APP_ARGS,
-      sparkConf)
-    assertThrows[SparkException] {
-      orchestrator.getAllConfigurationSteps
-    }
-
-    sparkConf.set("spark.files", "/path/to/file1,/path/to/file2")
-    orchestrator = new DriverConfigOrchestrator(
-      APP_ID,
-      KUBERNETES_RESOURCE_PREFIX,
-      Some(JavaMainAppResource("local:///var/apps/jars/main.jar")),
-      APP_NAME,
-      MAIN_CLASS,
-      APP_ARGS,
-      sparkConf)
-    assertThrows[SparkException] {
-      orchestrator.getAllConfigurationSteps
-    }
-  }
-
-  private def validateStepTypes(
-      orchestrator: DriverConfigOrchestrator,
-      types: Class[_ <: DriverConfigurationStep]*): Unit = {
-    val steps = orchestrator.getAllConfigurationSteps
-    assert(steps.size === types.size)
-    assert(steps.map(_.getClass) === types)
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
new file mode 100644
index 0000000000000..161f9afe7bba9
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.submit
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpec, KubernetesDriverSpecificConf}
+import org.apache.spark.deploy.k8s.features.{BasicDriverFeatureStep, DriverKubernetesCredentialsFeatureStep, DriverServiceFeatureStep, KubernetesFeaturesTestUtils, MountSecretsFeatureStep}
+
+class KubernetesDriverBuilderSuite extends SparkFunSuite {
+
+  private val BASIC_STEP_TYPE = "basic"
+  private val CREDENTIALS_STEP_TYPE = "credentials"
+  private val SERVICE_STEP_TYPE = "service"
+  private val SECRETS_STEP_TYPE = "mount-secrets"
+
+  private val basicFeatureStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
+    BASIC_STEP_TYPE, classOf[BasicDriverFeatureStep])
+
+  private val credentialsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
+    CREDENTIALS_STEP_TYPE, classOf[DriverKubernetesCredentialsFeatureStep])
+
+  private val serviceStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
+    SERVICE_STEP_TYPE, classOf[DriverServiceFeatureStep])
+
+  private val secretsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
+    SECRETS_STEP_TYPE, classOf[MountSecretsFeatureStep])
+
+  private val builderUnderTest: KubernetesDriverBuilder =
+    new KubernetesDriverBuilder(
+      _ => basicFeatureStep,
+      _ => credentialsStep,
+      _ => serviceStep,
+      _ => secretsStep)
+
+  test("Apply fundamental steps all the time.") {
+    val conf = KubernetesConf(
+      new SparkConf(false),
+      KubernetesDriverSpecificConf(
+        None,
+        "test-app",
+        "main",
+        Seq.empty),
+      "prefix",
+      "appId",
+      Map.empty,
+      Map.empty,
+      Map.empty,
+      Map.empty)
+    validateStepTypesApplied(
+      builderUnderTest.buildFromFeatures(conf),
+      BASIC_STEP_TYPE,
+      CREDENTIALS_STEP_TYPE,
+      SERVICE_STEP_TYPE)
+  }
+
+  test("Apply secrets step if secrets are present.") {
+    val conf = KubernetesConf(
+      new SparkConf(false),
+      KubernetesDriverSpecificConf(
+        None,
+        "test-app",
+        "main",
+        Seq.empty),
+      "prefix",
+      "appId",
+      Map.empty,
+      Map.empty,
+      Map("secret" -> "secretMountPath"),
+      Map.empty)
+    validateStepTypesApplied(
+      builderUnderTest.buildFromFeatures(conf),
+      BASIC_STEP_TYPE,
+      CREDENTIALS_STEP_TYPE,
+      SERVICE_STEP_TYPE,
+      SECRETS_STEP_TYPE)
+  }
+
+  private def validateStepTypesApplied(resolvedSpec: KubernetesDriverSpec, stepTypes: String*)
+    : Unit = {
+    assert(resolvedSpec.systemProperties.size === stepTypes.size)
+    stepTypes.foreach { stepType =>
+      assert(resolvedSpec.pod.pod.getMetadata.getLabels.get(stepType) === stepType)
+      assert(resolvedSpec.driverKubernetesResources.containsSlice(
+        KubernetesFeaturesTestUtils.getSecretsForStepType(stepType)))
+      assert(resolvedSpec.systemProperties(stepType) === stepType)
+    }
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala
deleted file mode 100644
index ee450fff8d376..0000000000000
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit.steps
-
-import scala.collection.JavaConverters._
-
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder}
-
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
-
-class BasicDriverConfigurationStepSuite extends SparkFunSuite {
-
-  private val APP_ID = "spark-app-id"
-  private val RESOURCE_NAME_PREFIX = "spark"
-  private val DRIVER_LABELS = Map("labelkey" -> "labelvalue")
-  private val CONTAINER_IMAGE_PULL_POLICY = "IfNotPresent"
-  private val APP_NAME = "spark-test"
-  private val MAIN_CLASS = "org.apache.spark.examples.SparkPi"
-  private val APP_ARGS = Array("arg1", "arg2", "\"arg 3\"")
-  private val CUSTOM_ANNOTATION_KEY = "customAnnotation"
-  private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue"
-  private val DRIVER_CUSTOM_ENV_KEY1 = "customDriverEnv1"
-  private val DRIVER_CUSTOM_ENV_KEY2 = "customDriverEnv2"
-
-  test("Set all possible configurations from the user.") {
-    val sparkConf = new SparkConf()
-      .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod")
-      .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, "/opt/spark/spark-examples.jar")
-      .set("spark.driver.cores", "2")
-      .set(KUBERNETES_DRIVER_LIMIT_CORES, "4")
-      .set(org.apache.spark.internal.config.DRIVER_MEMORY.key, "256M")
-      .set(org.apache.spark.internal.config.DRIVER_MEMORY_OVERHEAD, 200L)
-      .set(CONTAINER_IMAGE, "spark-driver:latest")
-      .set(s"$KUBERNETES_DRIVER_ANNOTATION_PREFIX$CUSTOM_ANNOTATION_KEY", CUSTOM_ANNOTATION_VALUE)
-      .set(s"$KUBERNETES_DRIVER_ENV_KEY$DRIVER_CUSTOM_ENV_KEY1", "customDriverEnv1")
-      .set(s"$KUBERNETES_DRIVER_ENV_KEY$DRIVER_CUSTOM_ENV_KEY2", "customDriverEnv2")
-      .set(IMAGE_PULL_SECRETS, "imagePullSecret1, imagePullSecret2")
-
-    val submissionStep = new BasicDriverConfigurationStep(
-      APP_ID,
-      RESOURCE_NAME_PREFIX,
-      DRIVER_LABELS,
-      CONTAINER_IMAGE_PULL_POLICY,
-      APP_NAME,
-      MAIN_CLASS,
-      APP_ARGS,
-      sparkConf)
-    val basePod = new PodBuilder().withNewMetadata().endMetadata().withNewSpec().endSpec().build()
-    val baseDriverSpec = KubernetesDriverSpec(
-      driverPod = basePod,
-      driverContainer = new ContainerBuilder().build(),
-      driverSparkConf = new SparkConf(false),
-      otherKubernetesResources = Seq.empty[HasMetadata])
-    val preparedDriverSpec = submissionStep.configureDriver(baseDriverSpec)
-
-    assert(preparedDriverSpec.driverContainer.getName === DRIVER_CONTAINER_NAME)
-    assert(preparedDriverSpec.driverContainer.getImage === "spark-driver:latest")
-    assert(preparedDriverSpec.driverContainer.getImagePullPolicy === CONTAINER_IMAGE_PULL_POLICY)
-
-    assert(preparedDriverSpec.driverContainer.getEnv.size === 4)
-    val envs = preparedDriverSpec.driverContainer
-      .getEnv
-      .asScala
-      .map(env => (env.getName, env.getValue))
-      .toMap
-    assert(envs(ENV_CLASSPATH) === "/opt/spark/spark-examples.jar")
-    assert(envs(DRIVER_CUSTOM_ENV_KEY1) === "customDriverEnv1")
-    assert(envs(DRIVER_CUSTOM_ENV_KEY2) === "customDriverEnv2")
-
-    assert(preparedDriverSpec.driverContainer.getEnv.asScala.exists(envVar =>
-      envVar.getName.equals(ENV_DRIVER_BIND_ADDRESS) &&
-        envVar.getValueFrom.getFieldRef.getApiVersion.equals("v1") &&
-        envVar.getValueFrom.getFieldRef.getFieldPath.equals("status.podIP")))
-
-    val resourceRequirements = preparedDriverSpec.driverContainer.getResources
-    val requests = resourceRequirements.getRequests.asScala
-    assert(requests("cpu").getAmount === "2")
-    assert(requests("memory").getAmount === "456Mi")
-    val limits = resourceRequirements.getLimits.asScala
-    assert(limits("memory").getAmount === "456Mi")
-    assert(limits("cpu").getAmount === "4")
-
-    val driverPodMetadata = preparedDriverSpec.driverPod.getMetadata
-    assert(driverPodMetadata.getName === "spark-driver-pod")
-    assert(driverPodMetadata.getLabels.asScala === DRIVER_LABELS)
-    val expectedAnnotations = Map(
-      CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE,
-      SPARK_APP_NAME_ANNOTATION -> APP_NAME)
-    assert(driverPodMetadata.getAnnotations.asScala === expectedAnnotations)
-
-    val driverPodSpec = preparedDriverSpec.driverPod.getSpec
-    assert(driverPodSpec.getRestartPolicy === "Never")
-    assert(driverPodSpec.getImagePullSecrets.size() === 2)
-    assert(driverPodSpec.getImagePullSecrets.get(0).getName === "imagePullSecret1")
-    assert(driverPodSpec.getImagePullSecrets.get(1).getName === "imagePullSecret2")
-
-    val resolvedSparkConf = preparedDriverSpec.driverSparkConf.getAll.toMap
-    val expectedSparkConf = Map(
-      KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
-      "spark.app.id" -> APP_ID,
-      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> RESOURCE_NAME_PREFIX,
-      "spark.kubernetes.submitInDriver" -> "true")
-    assert(resolvedSparkConf === expectedSparkConf)
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DependencyResolutionStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DependencyResolutionStepSuite.scala
deleted file mode 100644
index ca43fc97dc991..0000000000000
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DependencyResolutionStepSuite.scala
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit.steps
-
-import java.io.File
-
-import scala.collection.JavaConverters._
-
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder}
-
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
-
-class DependencyResolutionStepSuite extends SparkFunSuite {
-
-  private val SPARK_JARS = Seq(
-    "apps/jars/jar1.jar",
-    "local:///var/apps/jars/jar2.jar")
-
-  private val SPARK_FILES = Seq(
-    "apps/files/file1.txt",
-    "local:///var/apps/files/file2.txt")
-
-  test("Added dependencies should be resolved in Spark configuration and environment") {
-    val dependencyResolutionStep = new DependencyResolutionStep(
-      SPARK_JARS,
-      SPARK_FILES)
-    val driverPod = new PodBuilder().build()
-    val baseDriverSpec = KubernetesDriverSpec(
-      driverPod = driverPod,
-      driverContainer = new ContainerBuilder().build(),
-      driverSparkConf = new SparkConf(false),
-      otherKubernetesResources = Seq.empty[HasMetadata])
-    val preparedDriverSpec = dependencyResolutionStep.configureDriver(baseDriverSpec)
-    assert(preparedDriverSpec.driverPod === driverPod)
-    assert(preparedDriverSpec.otherKubernetesResources.isEmpty)
-    val resolvedSparkJars = preparedDriverSpec.driverSparkConf.get("spark.jars").split(",").toSet
-    val expectedResolvedSparkJars = Set(
-      "apps/jars/jar1.jar",
-      "/var/apps/jars/jar2.jar")
-    assert(resolvedSparkJars === expectedResolvedSparkJars)
-    val resolvedSparkFiles = preparedDriverSpec.driverSparkConf.get("spark.files").split(",").toSet
-    val expectedResolvedSparkFiles = Set(
-      "apps/files/file1.txt",
-      "/var/apps/files/file2.txt")
-    assert(resolvedSparkFiles === expectedResolvedSparkFiles)
-    val driverEnv = preparedDriverSpec.driverContainer.getEnv.asScala
-    assert(driverEnv.size === 1)
-    assert(driverEnv.head.getName === ENV_MOUNTED_CLASSPATH)
-    val resolvedDriverClasspath = driverEnv.head.getValue.split(File.pathSeparator).toSet
-    val expectedResolvedDriverClasspath = expectedResolvedSparkJars
-    assert(resolvedDriverClasspath === expectedResolvedDriverClasspath)
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverServiceBootstrapStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverServiceBootstrapStepSuite.scala
deleted file mode 100644
index 78c8c3ba1afbd..0000000000000
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/DriverServiceBootstrapStepSuite.scala
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit.steps
-
-import scala.collection.JavaConverters._
-
-import io.fabric8.kubernetes.api.model.Service
-import org.mockito.{Mock, MockitoAnnotations}
-import org.mockito.Mockito.when
-import org.scalatest.BeforeAndAfter
-
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
-import org.apache.spark.util.Clock
-
-class DriverServiceBootstrapStepSuite extends SparkFunSuite with BeforeAndAfter {
-
-  private val SHORT_RESOURCE_NAME_PREFIX =
-    "a" * (DriverServiceBootstrapStep.MAX_SERVICE_NAME_LENGTH -
-      DriverServiceBootstrapStep.DRIVER_SVC_POSTFIX.length)
-
-  private val LONG_RESOURCE_NAME_PREFIX =
-    "a" * (DriverServiceBootstrapStep.MAX_SERVICE_NAME_LENGTH -
-      DriverServiceBootstrapStep.DRIVER_SVC_POSTFIX.length + 1)
-  private val DRIVER_LABELS = Map(
-    "label1key" -> "label1value",
-    "label2key" -> "label2value")
-
-  @Mock
-  private var clock: Clock = _
-
-  private var sparkConf: SparkConf = _
-
-  before {
-    MockitoAnnotations.initMocks(this)
-    sparkConf = new SparkConf(false)
-  }
-
-  test("Headless service has a port for the driver RPC and the block manager.") {
-    val configurationStep = new DriverServiceBootstrapStep(
-      SHORT_RESOURCE_NAME_PREFIX,
-      DRIVER_LABELS,
-      sparkConf
-        .set("spark.driver.port", "9000")
-        .set(org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT, 8080),
-      clock)
-    val baseDriverSpec = KubernetesDriverSpec.initialSpec(sparkConf.clone())
-    val resolvedDriverSpec = configurationStep.configureDriver(baseDriverSpec)
-    assert(resolvedDriverSpec.otherKubernetesResources.size === 1)
-    assert(resolvedDriverSpec.otherKubernetesResources.head.isInstanceOf[Service])
-    val driverService = resolvedDriverSpec.otherKubernetesResources.head.asInstanceOf[Service]
-    verifyService(
-      9000,
-      8080,
-      s"$SHORT_RESOURCE_NAME_PREFIX${DriverServiceBootstrapStep.DRIVER_SVC_POSTFIX}",
-      driverService)
-  }
-
-  test("Hostname and ports are set according to the service name.") {
-    val configurationStep = new DriverServiceBootstrapStep(
-      SHORT_RESOURCE_NAME_PREFIX,
-      DRIVER_LABELS,
-      sparkConf
-        .set("spark.driver.port", "9000")
-        .set(org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT, 8080)
-        .set(KUBERNETES_NAMESPACE, "my-namespace"),
-      clock)
-    val baseDriverSpec = KubernetesDriverSpec.initialSpec(sparkConf.clone())
-    val resolvedDriverSpec = configurationStep.configureDriver(baseDriverSpec)
-    val expectedServiceName = SHORT_RESOURCE_NAME_PREFIX +
-      DriverServiceBootstrapStep.DRIVER_SVC_POSTFIX
-    val expectedHostName = s"$expectedServiceName.my-namespace.svc"
-    verifySparkConfHostNames(resolvedDriverSpec.driverSparkConf, expectedHostName)
-  }
-
-  test("Ports should resolve to defaults in SparkConf and in the service.") {
-    val configurationStep = new DriverServiceBootstrapStep(
-      SHORT_RESOURCE_NAME_PREFIX,
-      DRIVER_LABELS,
-      sparkConf,
-      clock)
-    val baseDriverSpec = KubernetesDriverSpec.initialSpec(sparkConf.clone())
-    val resolvedDriverSpec = configurationStep.configureDriver(baseDriverSpec)
-    verifyService(
-      DEFAULT_DRIVER_PORT,
-      DEFAULT_BLOCKMANAGER_PORT,
-      s"$SHORT_RESOURCE_NAME_PREFIX${DriverServiceBootstrapStep.DRIVER_SVC_POSTFIX}",
-      resolvedDriverSpec.otherKubernetesResources.head.asInstanceOf[Service])
-    assert(resolvedDriverSpec.driverSparkConf.get("spark.driver.port") ===
-      DEFAULT_DRIVER_PORT.toString)
-    assert(resolvedDriverSpec.driverSparkConf.get(
-      org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT) === DEFAULT_BLOCKMANAGER_PORT)
-  }
-
-  test("Long prefixes should switch to using a generated name.") {
-    val configurationStep = new DriverServiceBootstrapStep(
-      LONG_RESOURCE_NAME_PREFIX,
-      DRIVER_LABELS,
-      sparkConf.set(KUBERNETES_NAMESPACE, "my-namespace"),
-      clock)
-    when(clock.getTimeMillis()).thenReturn(10000)
-    val baseDriverSpec = KubernetesDriverSpec.initialSpec(sparkConf.clone())
-    val resolvedDriverSpec = configurationStep.configureDriver(baseDriverSpec)
-    val driverService = resolvedDriverSpec.otherKubernetesResources.head.asInstanceOf[Service]
-    val expectedServiceName = s"spark-10000${DriverServiceBootstrapStep.DRIVER_SVC_POSTFIX}"
-    assert(driverService.getMetadata.getName === expectedServiceName)
-    val expectedHostName = s"$expectedServiceName.my-namespace.svc"
-    verifySparkConfHostNames(resolvedDriverSpec.driverSparkConf, expectedHostName)
-  }
-
-  test("Disallow bind address and driver host to be set explicitly.") {
-    val configurationStep = new DriverServiceBootstrapStep(
-      LONG_RESOURCE_NAME_PREFIX,
-      DRIVER_LABELS,
-      sparkConf.set(org.apache.spark.internal.config.DRIVER_BIND_ADDRESS, "host"),
-      clock)
-    try {
-      configurationStep.configureDriver(KubernetesDriverSpec.initialSpec(sparkConf))
-      fail("The driver bind address should not be allowed.")
-    } catch {
-      case e: Throwable =>
-        assert(e.getMessage ===
-          s"requirement failed: ${DriverServiceBootstrapStep.DRIVER_BIND_ADDRESS_KEY} is" +
-          " not supported in Kubernetes mode, as the driver's bind address is managed" +
-          " and set to the driver pod's IP address.")
-    }
-    sparkConf.remove(org.apache.spark.internal.config.DRIVER_BIND_ADDRESS)
-    sparkConf.set(org.apache.spark.internal.config.DRIVER_HOST_ADDRESS, "host")
-    try {
-      configurationStep.configureDriver(KubernetesDriverSpec.initialSpec(sparkConf))
-      fail("The driver host address should not be allowed.")
-    } catch {
-      case e: Throwable =>
-        assert(e.getMessage ===
-          s"requirement failed: ${DriverServiceBootstrapStep.DRIVER_HOST_KEY} is" +
-          " not supported in Kubernetes mode, as the driver's hostname will be managed via" +
-          " a Kubernetes service.")
-    }
-  }
-
-  private def verifyService(
-      driverPort: Int,
-      blockManagerPort: Int,
-      expectedServiceName: String,
-      service: Service): Unit = {
-    assert(service.getMetadata.getName === expectedServiceName)
-    assert(service.getSpec.getClusterIP === "None")
-    assert(service.getSpec.getSelector.asScala === DRIVER_LABELS)
-    assert(service.getSpec.getPorts.size() === 2)
-    val driverServicePorts = service.getSpec.getPorts.asScala
-    assert(driverServicePorts.head.getName === DRIVER_PORT_NAME)
-    assert(driverServicePorts.head.getPort.intValue() === driverPort)
-    assert(driverServicePorts.head.getTargetPort.getIntVal === driverPort)
-    assert(driverServicePorts(1).getName === BLOCK_MANAGER_PORT_NAME)
-    assert(driverServicePorts(1).getPort.intValue() === blockManagerPort)
-    assert(driverServicePorts(1).getTargetPort.getIntVal === blockManagerPort)
-  }
-
-  private def verifySparkConfHostNames(
-      driverSparkConf: SparkConf, expectedHostName: String): Unit = {
-    assert(driverSparkConf.get(
-      org.apache.spark.internal.config.DRIVER_HOST_ADDRESS) === expectedHostName)
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala
deleted file mode 100644
index d73df20f0f956..0000000000000
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.scheduler.cluster.k8s
-
-import scala.collection.JavaConverters._
-
-import io.fabric8.kubernetes.api.model._
-import org.mockito.MockitoAnnotations
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterEach}
-
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.MountSecretsBootstrap
-
-class ExecutorPodFactorySuite extends SparkFunSuite with BeforeAndAfter with BeforeAndAfterEach {
-
-  private val driverPodName: String = "driver-pod"
-  private val driverPodUid: String = "driver-uid"
-  private val executorPrefix: String = "base"
-  private val executorImage: String = "executor-image"
-  private val imagePullSecrets: String = "imagePullSecret1, imagePullSecret2"
-  private val driverPod = new PodBuilder()
-    .withNewMetadata()
-    .withName(driverPodName)
-    .withUid(driverPodUid)
-    .endMetadata()
-    .withNewSpec()
-    .withNodeName("some-node")
-    .endSpec()
-    .withNewStatus()
-    .withHostIP("192.168.99.100")
-    .endStatus()
-    .build()
-  private var baseConf: SparkConf = _
-
-  before {
-    MockitoAnnotations.initMocks(this)
-    baseConf = new SparkConf()
-      .set(KUBERNETES_DRIVER_POD_NAME, driverPodName)
-      .set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, executorPrefix)
-      .set(CONTAINER_IMAGE, executorImage)
-      .set(KUBERNETES_DRIVER_SUBMIT_CHECK, true)
-      .set(IMAGE_PULL_SECRETS, imagePullSecrets)
-  }
-
-  test("basic executor pod has reasonable defaults") {
-    val factory = new ExecutorPodFactory(baseConf, None)
-    val executor = factory.createExecutorPod(
-      "1", "dummy", "dummy", Seq[(String, String)](), driverPod, Map[String, Int]())
-
-    // The executor pod name and default labels.
-    assert(executor.getMetadata.getName === s"$executorPrefix-exec-1")
-    assert(executor.getMetadata.getLabels.size() === 3)
-    assert(executor.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL) === "1")
-
-    // There is exactly 1 container with no volume mounts and default memory limits and requests.
-    // Default memory limit/request is 1024M + 384M (minimum overhead constant).
-    assert(executor.getSpec.getContainers.size() === 1)
-    assert(executor.getSpec.getContainers.get(0).getImage === executorImage)
-    assert(executor.getSpec.getContainers.get(0).getVolumeMounts.isEmpty)
-    assert(executor.getSpec.getContainers.get(0).getResources.getLimits.size() === 1)
-    assert(executor.getSpec.getContainers.get(0).getResources
-      .getRequests.get("memory").getAmount === "1408Mi")
-    assert(executor.getSpec.getContainers.get(0).getResources
-      .getLimits.get("memory").getAmount === "1408Mi")
-    assert(executor.getSpec.getImagePullSecrets.size() === 2)
-    assert(executor.getSpec.getImagePullSecrets.get(0).getName === "imagePullSecret1")
-    assert(executor.getSpec.getImagePullSecrets.get(1).getName === "imagePullSecret2")
-
-    // The pod has no node selector, volumes.
-    assert(executor.getSpec.getNodeSelector.isEmpty)
-    assert(executor.getSpec.getVolumes.isEmpty)
-
-    checkEnv(executor, Map())
-    checkOwnerReferences(executor, driverPodUid)
-  }
-
-  test("executor core request specification") {
-    var factory = new ExecutorPodFactory(baseConf, None)
-    var executor = factory.createExecutorPod(
-      "1", "dummy", "dummy", Seq[(String, String)](), driverPod, Map[String, Int]())
-    assert(executor.getSpec.getContainers.size() === 1)
-    assert(executor.getSpec.getContainers.get(0).getResources.getRequests.get("cpu").getAmount
-      === "1")
-
-    val conf = baseConf.clone()
-
-    conf.set(KUBERNETES_EXECUTOR_REQUEST_CORES, "0.1")
-    factory = new ExecutorPodFactory(conf, None)
-    executor = factory.createExecutorPod(
-      "1", "dummy", "dummy", Seq[(String, String)](), driverPod, Map[String, Int]())
-    assert(executor.getSpec.getContainers.size() === 1)
-    assert(executor.getSpec.getContainers.get(0).getResources.getRequests.get("cpu").getAmount
-      === "0.1")
-
-    conf.set(KUBERNETES_EXECUTOR_REQUEST_CORES, "100m")
-    factory = new ExecutorPodFactory(conf, None)
-    conf.set(KUBERNETES_EXECUTOR_REQUEST_CORES, "100m")
-    executor = factory.createExecutorPod(
-      "1", "dummy", "dummy", Seq[(String, String)](), driverPod, Map[String, Int]())
-    assert(executor.getSpec.getContainers.get(0).getResources.getRequests.get("cpu").getAmount
-      === "100m")
-  }
-
-  test("executor pod hostnames get truncated to 63 characters") {
-    val conf = baseConf.clone()
-    conf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX,
-      "loremipsumdolorsitametvimatelitrefficiendisuscipianturvixlegeresple")
-
-    val factory = new ExecutorPodFactory(conf, None)
-    val executor = factory.createExecutorPod(
-      "1", "dummy", "dummy", Seq[(String, String)](), driverPod, Map[String, Int]())
-
-    assert(executor.getSpec.getHostname.length === 63)
-  }
-
-  test("classpath and extra java options get translated into environment variables") {
-    val conf = baseConf.clone()
-    conf.set(org.apache.spark.internal.config.EXECUTOR_JAVA_OPTIONS, "foo=bar")
-    conf.set(org.apache.spark.internal.config.EXECUTOR_CLASS_PATH, "bar=baz")
-
-    val factory = new ExecutorPodFactory(conf, None)
-    val executor = factory.createExecutorPod(
-      "1", "dummy", "dummy", Seq[(String, String)]("qux" -> "quux"), driverPod, Map[String, Int]())
-
-    checkEnv(executor,
-      Map("SPARK_JAVA_OPT_0" -> "foo=bar",
-        ENV_CLASSPATH -> "bar=baz",
-        "qux" -> "quux"))
-    checkOwnerReferences(executor, driverPodUid)
-  }
-
-  test("executor secrets get mounted") {
-    val conf = baseConf.clone()
-
-    val secretsBootstrap = new MountSecretsBootstrap(Map("secret1" -> "/var/secret1"))
-    val factory = new ExecutorPodFactory(conf, Some(secretsBootstrap))
-    val executor = factory.createExecutorPod(
-      "1", "dummy", "dummy", Seq[(String, String)](), driverPod, Map[String, Int]())
-
-    assert(executor.getSpec.getContainers.size() === 1)
-    assert(executor.getSpec.getContainers.get(0).getVolumeMounts.size() === 1)
-    assert(executor.getSpec.getContainers.get(0).getVolumeMounts.get(0).getName
-      === "secret1-volume")
-    assert(executor.getSpec.getContainers.get(0).getVolumeMounts.get(0)
-      .getMountPath === "/var/secret1")
-
-    // check volume mounted.
-    assert(executor.getSpec.getVolumes.size() === 1)
-    assert(executor.getSpec.getVolumes.get(0).getSecret.getSecretName === "secret1")
-
-    checkOwnerReferences(executor, driverPodUid)
-  }
-
-  // There is always exactly one controller reference, and it points to the driver pod.
-  private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
-    assert(executor.getMetadata.getOwnerReferences.size() === 1)
-    assert(executor.getMetadata.getOwnerReferences.get(0).getUid === driverPodUid)
-    assert(executor.getMetadata.getOwnerReferences.get(0).getController === true)
-  }
-
-  // Check that the expected environment variables are present.
-  private def checkEnv(executor: Pod, additionalEnvVars: Map[String, String]): Unit = {
-    val defaultEnvs = Map(
-      ENV_EXECUTOR_ID -> "1",
-      ENV_DRIVER_URL -> "dummy",
-      ENV_EXECUTOR_CORES -> "1",
-      ENV_EXECUTOR_MEMORY -> "1g",
-      ENV_APPLICATION_ID -> "dummy",
-      ENV_SPARK_CONF_DIR -> SPARK_CONF_DIR_INTERNAL,
-      ENV_EXECUTOR_POD_IP -> null) ++ additionalEnvVars
-
-    assert(executor.getSpec.getContainers.size() === 1)
-    assert(executor.getSpec.getContainers.get(0).getEnv.size() === defaultEnvs.size)
-    val mapEnvs = executor.getSpec.getContainers.get(0).getEnv.asScala.map {
-      x => (x.getName, x.getValue)
-    }.toMap
-    assert(defaultEnvs === mapEnvs)
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index b2f26f205a329..96065e83f069c 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -18,11 +18,12 @@ package org.apache.spark.scheduler.cluster.k8s
 
 import java.util.concurrent.{ExecutorService, ScheduledExecutorService, TimeUnit}
 
-import io.fabric8.kubernetes.api.model.{DoneablePod, Pod, PodBuilder, PodList}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, DoneablePod, Pod, PodBuilder, PodList}
 import io.fabric8.kubernetes.client.{KubernetesClient, Watch, Watcher}
 import io.fabric8.kubernetes.client.Watcher.Action
 import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NonNamespaceOperation, PodResource}
-import org.mockito.{AdditionalAnswers, ArgumentCaptor, Mock, MockitoAnnotations}
+import org.hamcrest.{BaseMatcher, Description, Matcher}
+import org.mockito.{AdditionalAnswers, ArgumentCaptor, Matchers, Mock, MockitoAnnotations}
 import org.mockito.Matchers.{any, eq => mockitoEq}
 import org.mockito.Mockito.{doNothing, never, times, verify, when}
 import org.scalatest.BeforeAndAfter
@@ -31,6 +32,7 @@ import scala.collection.JavaConverters._
 import scala.concurrent.Future
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.rpc._
@@ -47,8 +49,6 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   private val SPARK_DRIVER_HOST = "localhost"
   private val SPARK_DRIVER_PORT = 7077
   private val POD_ALLOCATION_INTERVAL = "1m"
-  private val DRIVER_URL = RpcEndpointAddress(
-    SPARK_DRIVER_HOST, SPARK_DRIVER_PORT, CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
   private val FIRST_EXECUTOR_POD = new PodBuilder()
     .withNewMetadata()
       .withName("pod1")
@@ -94,7 +94,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   private var requestExecutorsService: ExecutorService = _
 
   @Mock
-  private var executorPodFactory: ExecutorPodFactory = _
+  private var executorBuilder: KubernetesExecutorBuilder = _
 
   @Mock
   private var kubernetesClient: KubernetesClient = _
@@ -399,7 +399,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     new KubernetesClusterSchedulerBackend(
       taskSchedulerImpl,
       rpcEnv,
-      executorPodFactory,
+      executorBuilder,
       kubernetesClient,
       allocatorExecutor,
       requestExecutorsService) {
@@ -428,13 +428,22 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
         .addToLabels(SPARK_EXECUTOR_ID_LABEL, executorId.toString)
         .endMetadata()
       .build()
-    when(executorPodFactory.createExecutorPod(
-      executorId.toString,
-      APP_ID,
-      DRIVER_URL,
-      sparkConf.getExecutorEnv,
-      driverPod,
-      Map.empty)).thenReturn(resolvedPod)
-    resolvedPod
+    val resolvedContainer = new ContainerBuilder().build()
+    when(executorBuilder.buildFromFeatures(Matchers.argThat(
+      new BaseMatcher[KubernetesConf[KubernetesExecutorSpecificConf]] {
+        override def matches(argument: scala.Any)
+          : Boolean = {
+          argument.isInstanceOf[KubernetesConf[KubernetesExecutorSpecificConf]] &&
+            argument.asInstanceOf[KubernetesConf[KubernetesExecutorSpecificConf]]
+              .roleSpecificConf.executorId == executorId.toString
+        }
+
+        override def describeTo(description: Description): Unit = {}
+      }))).thenReturn(SparkPod(resolvedPod, resolvedContainer))
+    new PodBuilder(resolvedPod)
+      .editSpec()
+        .addToContainers(resolvedContainer)
+        .endSpec()
+      .build()
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
new file mode 100644
index 0000000000000..f5270623f8acc
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler.cluster.k8s
+
+import io.fabric8.kubernetes.api.model.PodBuilder
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.features.{BasicExecutorFeatureStep, KubernetesFeaturesTestUtils, MountSecretsFeatureStep}
+
+class KubernetesExecutorBuilderSuite extends SparkFunSuite {
+  private val BASIC_STEP_TYPE = "basic"
+  private val SECRETS_STEP_TYPE = "mount-secrets"
+
+  private val basicFeatureStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
+    BASIC_STEP_TYPE, classOf[BasicExecutorFeatureStep])
+  private val mountSecretsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
+    SECRETS_STEP_TYPE, classOf[MountSecretsFeatureStep])
+
+  private val builderUnderTest = new KubernetesExecutorBuilder(
+    _ => basicFeatureStep,
+    _ => mountSecretsStep)
+
+  test("Basic steps are consistently applied.") {
+    val conf = KubernetesConf(
+      new SparkConf(false),
+      KubernetesExecutorSpecificConf(
+        "executor-id", new PodBuilder().build()),
+      "prefix",
+      "appId",
+      Map.empty,
+      Map.empty,
+      Map.empty,
+      Map.empty)
+    validateStepTypesApplied(builderUnderTest.buildFromFeatures(conf), BASIC_STEP_TYPE)
+  }
+
+  test("Apply secrets step if secrets are present.") {
+    val conf = KubernetesConf(
+      new SparkConf(false),
+      KubernetesExecutorSpecificConf(
+        "executor-id", new PodBuilder().build()),
+      "prefix",
+      "appId",
+      Map.empty,
+      Map.empty,
+      Map("secret" -> "secretMountPath"),
+      Map.empty)
+    validateStepTypesApplied(
+      builderUnderTest.buildFromFeatures(conf),
+      BASIC_STEP_TYPE,
+      SECRETS_STEP_TYPE)
+  }
+
+  private def validateStepTypesApplied(resolvedPod: SparkPod, stepTypes: String*): Unit = {
+    assert(resolvedPod.pod.getMetadata.getLabels.size === stepTypes.size)
+    stepTypes.foreach { stepType =>
+      assert(resolvedPod.pod.getMetadata.getLabels.get(stepType) === stepType)
+    }
+  }
+}
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
index aa378c9d340f1..ccf33e8d4283c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.mesos
 
 import java.util.concurrent.CountDownLatch
 
-import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.deploy.mesos.ui.MesosClusterUI
 import org.apache.spark.deploy.rest.mesos.MesosRestServer
@@ -100,7 +100,13 @@ private[mesos] object MesosClusterDispatcher
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler)
     Utils.initDaemon(log)
     val conf = new SparkConf
-    val dispatcherArgs = new MesosClusterDispatcherArguments(args, conf)
+    val dispatcherArgs = try {
+      new MesosClusterDispatcherArguments(args, conf)
+    } catch {
+      case e: SparkException =>
+        printErrorAndExit(e.getMessage())
+        null
+    }
     conf.setMaster(dispatcherArgs.masterUrl)
     conf.setAppName(dispatcherArgs.name)
     dispatcherArgs.zookeeperUrl.foreach { z =>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcherArguments.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcherArguments.scala
index 096bb4e1af688..267a4283db9e6 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcherArguments.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcherArguments.scala
@@ -21,6 +21,7 @@ import scala.annotation.tailrec
 import scala.collection.mutable
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkSubmitUtils
 import org.apache.spark.util.{IntParam, Utils}
 
 private[mesos] class MesosClusterDispatcherArguments(args: Array[String], conf: SparkConf) {
@@ -95,9 +96,8 @@ private[mesos] class MesosClusterDispatcherArguments(args: Array[String], conf:
       parse(tail)
 
     case ("--conf") :: value :: tail =>
-      val pair = MesosClusterDispatcher.
-        parseSparkConfProperty(value)
-        confProperties(pair._1) = pair._2
+      val (k, v) = SparkSubmitUtils.parseSparkConfProperty(value)
+      confProperties(k) = v
       parse(tail)
 
     case ("--help") :: tail =>
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
index c1ae12aabb8cc..17234b120ae13 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -29,7 +29,6 @@ import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.RpcEndpointRef
-import org.apache.spark.util.Utils
 
 /**
  * Handles registering and unregistering the application with the YARN ResourceManager.
@@ -71,7 +70,8 @@ private[spark] class YarnRMClient extends Logging {
 
     logInfo("Registering the ApplicationMaster")
     synchronized {
-      amClient.registerApplicationMaster(Utils.localHostName(), 0, trackingUrl)
+      amClient.registerApplicationMaster(driverRef.address.host, driverRef.address.port,
+        trackingUrl)
       registered = true
     }
     new YarnAllocator(driverUrl, driverRef, conf, sparkConf, amClient, getAttemptId(), securityMgr,
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilder.java
index f0f66bae245fd..f8000d78cd1b6 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilder.java
@@ -19,6 +19,8 @@
 
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.memory.ByteArrayMemoryBlock;
+import org.apache.spark.unsafe.memory.MemoryBlock;
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
@@ -29,43 +31,34 @@ public class UTF8StringBuilder {
 
   private static final int ARRAY_MAX = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH;
 
-  private byte[] buffer;
-  private int cursor = Platform.BYTE_ARRAY_OFFSET;
+  private ByteArrayMemoryBlock buffer;
+  private int length = 0;
 
   public UTF8StringBuilder() {
     // Since initial buffer size is 16 in `StringBuilder`, we set the same size here
-    this.buffer = new byte[16];
+    this.buffer = new ByteArrayMemoryBlock(16);
   }
 
   // Grows the buffer by at least `neededSize`
   private void grow(int neededSize) {
-    if (neededSize > ARRAY_MAX - totalSize()) {
+    if (neededSize > ARRAY_MAX - length) {
       throw new UnsupportedOperationException(
         "Cannot grow internal buffer by size " + neededSize + " because the size after growing " +
           "exceeds size limitation " + ARRAY_MAX);
     }
-    final int length = totalSize() + neededSize;
-    if (buffer.length < length) {
-      int newLength = length < ARRAY_MAX / 2 ? length * 2 : ARRAY_MAX;
-      final byte[] tmp = new byte[newLength];
-      Platform.copyMemory(
-        buffer,
-        Platform.BYTE_ARRAY_OFFSET,
-        tmp,
-        Platform.BYTE_ARRAY_OFFSET,
-        totalSize());
+    final int requestedSize = length + neededSize;
+    if (buffer.size() < requestedSize) {
+      int newLength = requestedSize < ARRAY_MAX / 2 ? requestedSize * 2 : ARRAY_MAX;
+      final ByteArrayMemoryBlock tmp = new ByteArrayMemoryBlock(newLength);
+      MemoryBlock.copyMemory(buffer, tmp, length);
       buffer = tmp;
     }
   }
 
-  private int totalSize() {
-    return cursor - Platform.BYTE_ARRAY_OFFSET;
-  }
-
   public void append(UTF8String value) {
     grow(value.numBytes());
-    value.writeToMemory(buffer, cursor);
-    cursor += value.numBytes();
+    value.writeToMemory(buffer.getByteArray(), length + Platform.BYTE_ARRAY_OFFSET);
+    length += value.numBytes();
   }
 
   public void append(String value) {
@@ -73,6 +66,6 @@ public void append(String value) {
   }
 
   public UTF8String build() {
-    return UTF8String.fromBytes(buffer, 0, totalSize());
+    return UTF8String.fromBytes(buffer.getByteArray(), 0, length);
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
index de0eb6dbb76be..2781655002000 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
@@ -16,6 +16,9 @@
  */
 package org.apache.spark.sql.catalyst.expressions.codegen;
 
+import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
+import org.apache.spark.sql.catalyst.expressions.UnsafeMapData;
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
 import org.apache.spark.sql.types.Decimal;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
@@ -103,21 +106,7 @@ protected final void zeroOutPaddingBytes(int numBytes) {
   public abstract void write(int ordinal, Decimal input, int precision, int scale);
 
   public final void write(int ordinal, UTF8String input) {
-    final int numBytes = input.numBytes();
-    final int roundedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(numBytes);
-
-    // grow the global buffer before writing data.
-    grow(roundedSize);
-
-    zeroOutPaddingBytes(numBytes);
-
-    // Write the bytes to the variable length portion.
-    input.writeToMemory(getBuffer(), cursor());
-
-    setOffsetAndSize(ordinal, numBytes);
-
-    // move the cursor forward.
-    increaseCursor(roundedSize);
+    writeUnalignedBytes(ordinal, input.getBaseObject(), input.getBaseOffset(), input.numBytes());
   }
 
   public final void write(int ordinal, byte[] input) {
@@ -125,20 +114,19 @@ public final void write(int ordinal, byte[] input) {
   }
 
   public final void write(int ordinal, byte[] input, int offset, int numBytes) {
-    final int roundedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(input.length);
+    writeUnalignedBytes(ordinal, input, Platform.BYTE_ARRAY_OFFSET + offset, numBytes);
+  }
 
-    // grow the global buffer before writing data.
+  private void writeUnalignedBytes(
+      int ordinal,
+      Object baseObject,
+      long baseOffset,
+      int numBytes) {
+    final int roundedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(numBytes);
     grow(roundedSize);
-
     zeroOutPaddingBytes(numBytes);
-
-    // Write the bytes to the variable length portion.
-    Platform.copyMemory(
-      input, Platform.BYTE_ARRAY_OFFSET + offset, getBuffer(), cursor(), numBytes);
-
+    Platform.copyMemory(baseObject, baseOffset, getBuffer(), cursor(), numBytes);
     setOffsetAndSize(ordinal, numBytes);
-
-    // move the cursor forward.
     increaseCursor(roundedSize);
   }
 
@@ -156,6 +144,40 @@ public final void write(int ordinal, CalendarInterval input) {
     increaseCursor(16);
   }
 
+  public final void write(int ordinal, UnsafeRow row) {
+    writeAlignedBytes(ordinal, row.getBaseObject(), row.getBaseOffset(), row.getSizeInBytes());
+  }
+
+  public final void write(int ordinal, UnsafeMapData map) {
+    writeAlignedBytes(ordinal, map.getBaseObject(), map.getBaseOffset(), map.getSizeInBytes());
+  }
+
+  public final void write(UnsafeArrayData array) {
+    // Unsafe arrays both can be written as a regular array field or as part of a map. This makes
+    // updating the offset and size dependent on the code path, this is why we currently do not
+    // provide an method for writing unsafe arrays that also updates the size and offset.
+    int numBytes = array.getSizeInBytes();
+    grow(numBytes);
+    Platform.copyMemory(
+            array.getBaseObject(),
+            array.getBaseOffset(),
+            getBuffer(),
+            cursor(),
+            numBytes);
+    increaseCursor(numBytes);
+  }
+
+  private void writeAlignedBytes(
+      int ordinal,
+      Object baseObject,
+      long baseOffset,
+      int numBytes) {
+    grow(numBytes);
+    Platform.copyMemory(baseObject, baseOffset, getBuffer(), cursor(), numBytes);
+    setOffsetAndSize(ordinal, numBytes);
+    increaseCursor(numBytes);
+  }
+
   protected final void writeBoolean(long offset, boolean value) {
     Platform.putBoolean(getBuffer(), offset, value);
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index 29110640d64f2..274d75e680f03 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
-import org.apache.spark.sql.types.{DataType, Decimal, StructType}
+import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -119,4 +119,28 @@ object InternalRow {
     case v: MapData => v.copy()
     case _ => value
   }
+
+  /**
+   * Returns an accessor for an `InternalRow` with given data type. The returned accessor
+   * actually takes a `SpecializedGetters` input because it can be generalized to other classes
+   * that implements `SpecializedGetters` (e.g., `ArrayData`) too.
+   */
+  def getAccessor(dataType: DataType): (SpecializedGetters, Int) => Any = dataType match {
+    case BooleanType => (input, ordinal) => input.getBoolean(ordinal)
+    case ByteType => (input, ordinal) => input.getByte(ordinal)
+    case ShortType => (input, ordinal) => input.getShort(ordinal)
+    case IntegerType | DateType => (input, ordinal) => input.getInt(ordinal)
+    case LongType | TimestampType => (input, ordinal) => input.getLong(ordinal)
+    case FloatType => (input, ordinal) => input.getFloat(ordinal)
+    case DoubleType => (input, ordinal) => input.getDouble(ordinal)
+    case StringType => (input, ordinal) => input.getUTF8String(ordinal)
+    case BinaryType => (input, ordinal) => input.getBinary(ordinal)
+    case CalendarIntervalType => (input, ordinal) => input.getInterval(ordinal)
+    case t: DecimalType => (input, ordinal) => input.getDecimal(ordinal, t.precision, t.scale)
+    case t: StructType => (input, ordinal) => input.getStruct(ordinal, t.size)
+    case _: ArrayType => (input, ordinal) => input.getArray(ordinal)
+    case _: MapType => (input, ordinal) => input.getMap(ordinal)
+    case u: UserDefinedType[_] => getAccessor(u.sqlType)
+    case _ => (input, ordinal) => input.get(ordinal, dataType)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e672b9f7063c1..1ae956a0dba0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -395,6 +395,7 @@ object FunctionRegistry {
     expression[TruncTimestamp]("date_trunc"),
     expression[UnixTimestamp]("unix_timestamp"),
     expression[DayOfWeek]("dayofweek"),
+    expression[WeekDay]("weekday"),
     expression[WeekOfYear]("weekofyear"),
     expression[Year]("year"),
     expression[TimeWindow]("window"),
@@ -409,6 +410,8 @@ object FunctionRegistry {
     expression[MapValues]("map_values"),
     expression[Size]("size"),
     expression[SortArray]("sort_array"),
+    expression[ArrayMin]("array_min"),
+    expression[ArrayMax]("array_max"),
     CreateStruct.registryEntry,
 
     // misc functions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index b55043c270644..ff9d6d7a7dded 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -345,7 +345,7 @@ object UnsupportedOperationChecker {
     plan.foreachUp { implicit subPlan =>
       subPlan match {
         case (_: Project | _: Filter | _: MapElements | _: MapPartitions |
-              _: DeserializeToObject | _: SerializeFromObject) =>
+              _: DeserializeToObject | _: SerializeFromObject | _: SubqueryAlias) =>
         case node if node.nodeName == "StreamingRelationV2" =>
         case node =>
           throwError(s"Continuous processing does not support ${node.nodeName} operations.")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index a65f58fa61ff4..71e23175168e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.parser.ParserUtils
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, UnaryNode}
 import org.apache.spark.sql.catalyst.trees.TreeNode
@@ -335,7 +335,7 @@ case class UnresolvedRegex(regexPattern: String, table: Option[String], caseSens
  * @param names the names to be associated with each output of computing [[child]].
  */
 case class MultiAlias(child: Expression, names: Seq[String])
-  extends UnaryExpression with NamedExpression with CodegenFallback {
+  extends UnaryExpression with NamedExpression with Unevaluable {
 
   override def name: String = throw new UnresolvedException(this, "name")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 52ed89ef8d781..c390337c03ff5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -286,7 +286,10 @@ class SessionCatalog(
    * Create a metastore table in the database specified in `tableDefinition`.
    * If no such database is specified, create it in the current database.
    */
-  def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = {
+  def createTable(
+      tableDefinition: CatalogTable,
+      ignoreIfExists: Boolean,
+      validateLocation: Boolean = true): Unit = {
     val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(tableDefinition.identifier.table)
     val tableIdentifier = TableIdentifier(table, Some(db))
@@ -305,7 +308,11 @@ class SessionCatalog(
     }
 
     requireDbExists(db)
-    if (!ignoreIfExists) {
+    if (tableExists(newTableDefinition.identifier)) {
+      if (!ignoreIfExists) {
+        throw new TableAlreadyExistsException(db = db, table = table)
+      }
+    } else if (validateLocation) {
       validateTableLocation(newTableDefinition)
     }
     externalCatalog.createTable(newTableDefinition, ignoreIfExists)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
index 5021a567592e0..4cc84b27d9eb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
@@ -33,28 +33,14 @@ case class BoundReference(ordinal: Int, dataType: DataType, nullable: Boolean)
 
   override def toString: String = s"input[$ordinal, ${dataType.simpleString}, $nullable]"
 
+  private val accessor: (InternalRow, Int) => Any = InternalRow.getAccessor(dataType)
+
   // Use special getter for primitive types (for UnsafeRow)
   override def eval(input: InternalRow): Any = {
-    if (input.isNullAt(ordinal)) {
+    if (nullable && input.isNullAt(ordinal)) {
       null
     } else {
-      dataType match {
-        case BooleanType => input.getBoolean(ordinal)
-        case ByteType => input.getByte(ordinal)
-        case ShortType => input.getShort(ordinal)
-        case IntegerType | DateType => input.getInt(ordinal)
-        case LongType | TimestampType => input.getLong(ordinal)
-        case FloatType => input.getFloat(ordinal)
-        case DoubleType => input.getDouble(ordinal)
-        case StringType => input.getUTF8String(ordinal)
-        case BinaryType => input.getBinary(ordinal)
-        case CalendarIntervalType => input.getInterval(ordinal)
-        case t: DecimalType => input.getDecimal(ordinal, t.precision, t.scale)
-        case t: StructType => input.getStruct(ordinal, t.size)
-        case _: ArrayType => input.getArray(ordinal)
-        case _: MapType => input.getMap(ordinal)
-        case _ => input.get(ordinal, dataType)
-      }
+      accessor(input, ordinal)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 7a5e49cb5206b..97dff6ae88299 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -104,9 +104,9 @@ abstract class Expression extends TreeNode[Expression] {
     }.getOrElse {
       val isNull = ctx.freshName("isNull")
       val value = ctx.freshName("value")
-      val eval = doGenCode(ctx, ExprCode("",
-        VariableValue(isNull, CodeGenerator.JAVA_BOOLEAN),
-        VariableValue(value, CodeGenerator.javaType(dataType))))
+      val eval = doGenCode(ctx, ExprCode(
+        JavaCode.isNullVariable(isNull),
+        JavaCode.variable(value, dataType)))
       reduceCodeSize(ctx, eval)
       if (eval.code.nonEmpty) {
         // Add `this` in the comment.
@@ -123,7 +123,7 @@ abstract class Expression extends TreeNode[Expression] {
       val setIsNull = if (!eval.isNull.isInstanceOf[LiteralValue]) {
         val globalIsNull = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "globalIsNull")
         val localIsNull = eval.isNull
-        eval.isNull = GlobalValue(globalIsNull, CodeGenerator.JAVA_BOOLEAN)
+        eval.isNull = JavaCode.isNullGlobal(globalIsNull)
         s"$globalIsNull = $localIsNull;"
       } else {
         ""
@@ -142,7 +142,7 @@ abstract class Expression extends TreeNode[Expression] {
            |}
            """.stripMargin)
 
-      eval.value = VariableValue(newValue, javaType)
+      eval.value = JavaCode.variable(newValue, dataType)
       eval.code = s"$javaType $newValue = $funcFullName(${ctx.INPUT_ROW});"
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
index b31466f5c92d1..6d69d69b1c802 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
@@ -173,21 +173,17 @@ object InterpretedUnsafeProjection extends UnsafeProjectionCreator {
         val rowWriter = new UnsafeRowWriter(writer, numFields)
         val structWriter = generateStructWriter(rowWriter, fields)
         (v, i) => {
-          val previousCursor = writer.cursor()
           v.getStruct(i, fields.length) match {
             case row: UnsafeRow =>
-              writeUnsafeData(
-                rowWriter,
-                row.getBaseObject,
-                row.getBaseOffset,
-                row.getSizeInBytes)
+              writer.write(i, row)
             case row =>
+              val previousCursor = writer.cursor()
               // Nested struct. We don't know where this will start because a row can be
               // variable length, so we need to update the offsets and zero out the bit mask.
               rowWriter.resetRowWriter()
               structWriter.apply(row)
+              writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
           }
-          writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
         }
 
       case ArrayType(elementType, containsNull) =>
@@ -214,15 +210,12 @@ object InterpretedUnsafeProjection extends UnsafeProjectionCreator {
           valueType,
           valueContainsNull)
         (v, i) => {
-          val previousCursor = writer.cursor()
           v.getMap(i) match {
             case map: UnsafeMapData =>
-              writeUnsafeData(
-                valueArrayWriter,
-                map.getBaseObject,
-                map.getBaseOffset,
-                map.getSizeInBytes)
+              writer.write(i, map)
             case map =>
+              val previousCursor = writer.cursor()
+
               // preserve 8 bytes to write the key array numBytes later.
               valueArrayWriter.grow(8)
               valueArrayWriter.increaseCursor(8)
@@ -237,8 +230,8 @@ object InterpretedUnsafeProjection extends UnsafeProjectionCreator {
 
               // Write the values.
               writeArray(valueArrayWriter, valueWriter, map.valueArray())
+              writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
           }
-          writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
         }
 
       case udt: UserDefinedType[_] =>
@@ -318,11 +311,7 @@ object InterpretedUnsafeProjection extends UnsafeProjectionCreator {
       elementWriter: (SpecializedGetters, Int) => Unit,
       array: ArrayData): Unit = array match {
     case unsafe: UnsafeArrayData =>
-      writeUnsafeData(
-        arrayWriter,
-        unsafe.getBaseObject,
-        unsafe.getBaseOffset,
-        unsafe.getSizeInBytes)
+      arrayWriter.write(unsafe)
     case _ =>
       val numElements = array.numElements()
       arrayWriter.initialize(numElements)
@@ -332,23 +321,4 @@ object InterpretedUnsafeProjection extends UnsafeProjectionCreator {
         i += 1
       }
   }
-
-  /**
-   * Write an opaque block of data to the buffer. This is used to copy
-   * [[UnsafeRow]], [[UnsafeArrayData]] and [[UnsafeMapData]] objects.
-   */
-  private def writeUnsafeData(
-      writer: UnsafeWriter,
-      baseObject: AnyRef,
-      baseOffset: Long,
-      sizeInBytes: Int) : Unit = {
-    writer.grow(sizeInBytes)
-    Platform.copyMemory(
-      baseObject,
-      baseOffset,
-      writer.getBuffer,
-      writer.cursor,
-      sizeInBytes)
-    writer.increaseCursor(sizeInBytes)
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index defd6f3cd8849..d4e322d23b95b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -591,16 +591,11 @@ case class Least(children: Seq[Expression]) extends Expression {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val evalChildren = children.map(_.genCode(ctx))
-    ev.isNull = GlobalValue(ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, ev.isNull),
-      CodeGenerator.JAVA_BOOLEAN)
+    ev.isNull = JavaCode.isNullGlobal(ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, ev.isNull))
     val evals = evalChildren.map(eval =>
       s"""
          |${eval.code}
-         |if (!${eval.isNull} && (${ev.isNull} ||
-         |  ${ctx.genGreater(dataType, ev.value, eval.value)})) {
-         |  ${ev.isNull} = false;
-         |  ${ev.value} = ${eval.value};
-         |}
+         |${ctx.reassignIfSmaller(dataType, ev, eval)}
       """.stripMargin
     )
 
@@ -671,16 +666,11 @@ case class Greatest(children: Seq[Expression]) extends Expression {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val evalChildren = children.map(_.genCode(ctx))
-    ev.isNull = GlobalValue(ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, ev.isNull),
-      CodeGenerator.JAVA_BOOLEAN)
+    ev.isNull = JavaCode.isNullGlobal(ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, ev.isNull))
     val evals = evalChildren.map(eval =>
       s"""
          |${eval.code}
-         |if (!${eval.isNull} && (${ev.isNull} ||
-         |  ${ctx.genGreater(dataType, eval.value, ev.value)})) {
-         |  ${ev.isNull} = false;
-         |  ${ev.value} = ${eval.value};
-         |}
+         |${ctx.reassignIfGreater(dataType, ev, eval)}
       """.stripMargin
     )
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index c9c60ef1be640..d97611c98ac91 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -59,10 +59,12 @@ import org.apache.spark.util.{ParentClassLoader, Utils}
 case class ExprCode(var code: String, var isNull: ExprValue, var value: ExprValue)
 
 object ExprCode {
+  def apply(isNull: ExprValue, value: ExprValue): ExprCode = {
+    ExprCode(code = "", isNull, value)
+  }
+
   def forNullValue(dataType: DataType): ExprCode = {
-    val defaultValueLiteral = CodeGenerator.defaultValue(dataType, typedNull = true)
-    ExprCode(code = "", isNull = TrueLiteral,
-      value = LiteralValue(defaultValueLiteral, CodeGenerator.javaType(dataType)))
+    ExprCode(code = "", isNull = TrueLiteral, JavaCode.defaultLiteral(dataType))
   }
 
   def forNonNullValue(value: ExprValue): ExprCode = {
@@ -331,7 +333,7 @@ class CodegenContext {
       case _: StructType | _: ArrayType | _: MapType => s"$value = $initCode.copy();"
       case _ => s"$value = $initCode;"
     }
-    ExprCode(code, FalseLiteral, GlobalValue(value, javaType(dataType)))
+    ExprCode(code, FalseLiteral, JavaCode.global(value, dataType))
   }
 
   def declareMutableStates(): String = {
@@ -697,6 +699,40 @@ class CodegenContext {
     case _ => s"(${genComp(dataType, c1, c2)}) > 0"
   }
 
+  /**
+   * Generates code for updating `partialResult` if `item` is smaller than it.
+   *
+   * @param dataType data type of the expressions
+   * @param partialResult `ExprCode` representing the partial result which has to be updated
+   * @param item `ExprCode` representing the new expression to evaluate for the result
+   */
+  def reassignIfSmaller(dataType: DataType, partialResult: ExprCode, item: ExprCode): String = {
+    s"""
+       |if (!${item.isNull} && (${partialResult.isNull} ||
+       |  ${genGreater(dataType, partialResult.value, item.value)})) {
+       |  ${partialResult.isNull} = false;
+       |  ${partialResult.value} = ${item.value};
+       |}
+      """.stripMargin
+  }
+
+  /**
+   * Generates code for updating `partialResult` if `item` is greater than it.
+   *
+   * @param dataType data type of the expressions
+   * @param partialResult `ExprCode` representing the partial result which has to be updated
+   * @param item `ExprCode` representing the new expression to evaluate for the result
+   */
+  def reassignIfGreater(dataType: DataType, partialResult: ExprCode, item: ExprCode): String = {
+    s"""
+       |if (!${item.isNull} && (${partialResult.isNull} ||
+       |  ${genGreater(dataType, item.value, partialResult.value)})) {
+       |  ${partialResult.isNull} = false;
+       |  ${partialResult.value} = ${item.value};
+       |}
+      """.stripMargin
+  }
+
   /**
    * Generates code to do null safe execution, i.e. only execute the code when the input is not
    * null by adding null check if necessary.
@@ -1004,8 +1040,9 @@ class CodegenContext {
       // at least two nodes) as the cost of doing it is expected to be low.
 
       subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
-      val state = SubExprEliminationState(GlobalValue(isNull, JAVA_BOOLEAN),
-        GlobalValue(value, javaType(expr.dataType)))
+      val state = SubExprEliminationState(
+        JavaCode.isNullGlobal(isNull),
+        JavaCode.global(value, expr.dataType))
       subExprEliminationExprs ++= e.map(_ -> state).toMap
     }
   }
@@ -1479,6 +1516,26 @@ object CodeGenerator extends Logging {
     case _ => "Object"
   }
 
+  def javaClass(dt: DataType): Class[_] = dt match {
+    case BooleanType => java.lang.Boolean.TYPE
+    case ByteType => java.lang.Byte.TYPE
+    case ShortType => java.lang.Short.TYPE
+    case IntegerType | DateType => java.lang.Integer.TYPE
+    case LongType | TimestampType => java.lang.Long.TYPE
+    case FloatType => java.lang.Float.TYPE
+    case DoubleType => java.lang.Double.TYPE
+    case _: DecimalType => classOf[Decimal]
+    case BinaryType => classOf[Array[Byte]]
+    case StringType => classOf[UTF8String]
+    case CalendarIntervalType => classOf[CalendarInterval]
+    case _: StructType => classOf[InternalRow]
+    case _: ArrayType => classOf[ArrayData]
+    case _: MapType => classOf[MapData]
+    case udt: UserDefinedType[_] => javaClass(udt.sqlType)
+    case ObjectType(cls) => cls
+    case _ => classOf[Object]
+  }
+
   /**
    * Returns the boxed type in Java.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValue.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValue.scala
deleted file mode 100644
index df5f1c58b1b2d..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValue.scala
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions.codegen
-
-import scala.language.implicitConversions
-
-import org.apache.spark.sql.types.DataType
-
-// An abstraction that represents the evaluation result of [[ExprCode]].
-abstract class ExprValue {
-
-  val javaType: String
-
-  // Whether we can directly access the evaluation value anywhere.
-  // For example, a variable created outside a method can not be accessed inside the method.
-  // For such cases, we may need to pass the evaluation as parameter.
-  val canDirectAccess: Boolean
-
-  def isPrimitive: Boolean = CodeGenerator.isPrimitiveType(javaType)
-}
-
-object ExprValue {
-  implicit def exprValueToString(exprValue: ExprValue): String = exprValue.toString
-}
-
-// A literal evaluation of [[ExprCode]].
-class LiteralValue(val value: String, val javaType: String) extends ExprValue {
-  override def toString: String = value
-  override val canDirectAccess: Boolean = true
-}
-
-object LiteralValue {
-  def apply(value: String, javaType: String): LiteralValue = new LiteralValue(value, javaType)
-  def unapply(literal: LiteralValue): Option[(String, String)] =
-    Some((literal.value, literal.javaType))
-}
-
-// A variable evaluation of [[ExprCode]].
-case class VariableValue(
-    val variableName: String,
-    val javaType: String) extends ExprValue {
-  override def toString: String = variableName
-  override val canDirectAccess: Boolean = false
-}
-
-// A statement evaluation of [[ExprCode]].
-case class StatementValue(
-    val statement: String,
-    val javaType: String,
-    val canDirectAccess: Boolean = false) extends ExprValue {
-  override def toString: String = statement
-}
-
-// A global variable evaluation of [[ExprCode]].
-case class GlobalValue(val value: String, val javaType: String) extends ExprValue {
-  override def toString: String = value
-  override val canDirectAccess: Boolean = true
-}
-
-case object TrueLiteral extends LiteralValue("true", "boolean")
-case object FalseLiteral extends LiteralValue("false", "boolean")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 3ae0b54c754cf..33d14329ec95c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -52,43 +52,45 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
       expressions: Seq[Expression],
       useSubexprElimination: Boolean): MutableProjection = {
     val ctx = newCodeGenContext()
-    val (validExpr, index) = expressions.zipWithIndex.filter {
+    val validExpr = expressions.zipWithIndex.filter {
       case (NoOp, _) => false
       case _ => true
-    }.unzip
-    val exprVals = ctx.generateExpressions(validExpr, useSubexprElimination)
+    }
+    val exprVals = ctx.generateExpressions(validExpr.map(_._1), useSubexprElimination)
 
     // 4-tuples: (code for projection, isNull variable name, value variable name, column index)
-    val projectionCodes: Seq[(String, ExprValue, String, Int)] = exprVals.zip(index).map {
-      case (ev, i) =>
-        val e = expressions(i)
-        val value = ctx.addMutableState(CodeGenerator.javaType(e.dataType), "value")
-        if (e.nullable) {
+    val projectionCodes: Seq[(String, String)] = validExpr.zip(exprVals).map {
+      case ((e, i), ev) =>
+        val value = JavaCode.global(
+          ctx.addMutableState(CodeGenerator.javaType(e.dataType), "value"),
+          e.dataType)
+        val (code, isNull) = if (e.nullable) {
           val isNull = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "isNull")
           (s"""
               |${ev.code}
               |$isNull = ${ev.isNull};
               |$value = ${ev.value};
-            """.stripMargin, GlobalValue(isNull, CodeGenerator.JAVA_BOOLEAN), value, i)
+            """.stripMargin, JavaCode.isNullGlobal(isNull))
         } else {
           (s"""
               |${ev.code}
               |$value = ${ev.value};
-            """.stripMargin, ev.isNull, value, i)
+            """.stripMargin, FalseLiteral)
         }
+        val update = CodeGenerator.updateColumn(
+          "mutableRow",
+          e.dataType,
+          i,
+          ExprCode(isNull, value),
+          e.nullable)
+        (code, update)
     }
 
     // Evaluate all the subexpressions.
     val evalSubexpr = ctx.subexprFunctions.mkString("\n")
 
-    val updates = validExpr.zip(projectionCodes).map {
-      case (e, (_, isNull, value, i)) =>
-        val ev = ExprCode("", isNull, GlobalValue(value, CodeGenerator.javaType(e.dataType)))
-        CodeGenerator.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
-    }
-
     val allProjections = ctx.splitExpressionsWithCurrentInputs(projectionCodes.map(_._1))
-    val allUpdates = ctx.splitExpressionsWithCurrentInputs(updates)
+    val allUpdates = ctx.splitExpressionsWithCurrentInputs(projectionCodes.map(_._2))
 
     val codeBody = s"""
       public java.lang.Object generate(Object[] references) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index a30a0b22cd305..01c350e9dbf69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -19,9 +19,10 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 
 import scala.annotation.tailrec
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData}
 import org.apache.spark.sql.types._
 
 /**
@@ -53,9 +54,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val rowClass = classOf[GenericInternalRow].getName
 
     val fieldWriters = schema.map(_.dataType).zipWithIndex.map { case (dt, i) =>
-      val converter = convertToSafe(ctx,
-        StatementValue(CodeGenerator.getValue(tmpInput, dt, i.toString),
-          CodeGenerator.javaType(dt)), dt)
+      val converter = convertToSafe(
+        ctx,
+        JavaCode.expression(CodeGenerator.getValue(tmpInput, dt, i.toString), dt),
+        dt)
       s"""
         if (!$tmpInput.isNullAt($i)) {
           ${converter.code}
@@ -76,7 +78,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
          |final InternalRow $output = new $rowClass($values);
        """.stripMargin
 
-    ExprCode(code, FalseLiteral, VariableValue(output, "InternalRow"))
+    ExprCode(code, FalseLiteral, JavaCode.variable(output, classOf[InternalRow]))
   }
 
   private def createCodeForArray(
@@ -91,9 +93,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val index = ctx.freshName("index")
     val arrayClass = classOf[GenericArrayData].getName
 
-    val elementConverter = convertToSafe(ctx,
-        StatementValue(CodeGenerator.getValue(tmpInput, elementType, index),
-          CodeGenerator.javaType(elementType)), elementType)
+    val elementConverter = convertToSafe(
+      ctx,
+      JavaCode.expression(CodeGenerator.getValue(tmpInput, elementType, index), elementType),
+      elementType)
     val code = s"""
       final ArrayData $tmpInput = $input;
       final int $numElements = $tmpInput.numElements();
@@ -107,7 +110,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
       final ArrayData $output = new $arrayClass($values);
     """
 
-    ExprCode(code, FalseLiteral, VariableValue(output, "ArrayData"))
+    ExprCode(code, FalseLiteral, JavaCode.variable(output, classOf[ArrayData]))
   }
 
   private def createCodeForMap(
@@ -128,7 +131,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
       final MapData $output = new $mapClass(${keyConverter.value}, ${valueConverter.value});
     """
 
-    ExprCode(code, FalseLiteral, VariableValue(output, "MapData"))
+    ExprCode(code, FalseLiteral, JavaCode.variable(output, classOf[MapData]))
   }
 
   @tailrec
@@ -140,7 +143,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     case ArrayType(elementType, _) => createCodeForArray(ctx, input, elementType)
     case MapType(keyType, valueType, _) => createCodeForMap(ctx, input, keyType, valueType)
     case udt: UserDefinedType[_] => convertToSafe(ctx, input, udt.sqlType)
-    case _ => ExprCode("", FalseLiteral, input)
+    case _ => ExprCode(FalseLiteral, input)
   }
 
   protected def create(expressions: Seq[Expression]): Projection = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 4a4d76313a543..01b4d6c4529bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -32,14 +32,13 @@ import org.apache.spark.sql.types._
 object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafeProjection] {
 
   /** Returns true iff we support this data type. */
-  def canSupport(dataType: DataType): Boolean = dataType match {
+  def canSupport(dataType: DataType): Boolean = UserDefinedType.sqlType(dataType) match {
     case NullType => true
-    case t: AtomicType => true
+    case _: AtomicType => true
     case _: CalendarIntervalType => true
     case t: StructType => t.forall(field => canSupport(field.dataType))
     case t: ArrayType if canSupport(t.elementType) => true
     case MapType(kt, vt, _) if canSupport(kt) && canSupport(vt) => true
-    case udt: UserDefinedType[_] => canSupport(udt.sqlType)
     case _ => false
   }
 
@@ -47,28 +46,33 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
   private def writeStructToBuffer(
       ctx: CodegenContext,
       input: String,
+      index: String,
       fieldTypes: Seq[DataType],
       rowWriter: String): String = {
     // Puts `input` in a local variable to avoid to re-evaluate it if it's a statement.
     val tmpInput = ctx.freshName("tmpInput")
     val fieldEvals = fieldTypes.zipWithIndex.map { case (dt, i) =>
-      ExprCode("", StatementValue(s"$tmpInput.isNullAt($i)", CodeGenerator.JAVA_BOOLEAN),
-        StatementValue(CodeGenerator.getValue(tmpInput, dt, i.toString),
-          CodeGenerator.javaType(dt)))
+      ExprCode(
+        JavaCode.isNullExpression(s"$tmpInput.isNullAt($i)"),
+        JavaCode.expression(CodeGenerator.getValue(tmpInput, dt, i.toString), dt))
     }
 
     val rowWriterClass = classOf[UnsafeRowWriter].getName
     val structRowWriter = ctx.addMutableState(rowWriterClass, "rowWriter",
       v => s"$v = new $rowWriterClass($rowWriter, ${fieldEvals.length});")
-
+    val previousCursor = ctx.freshName("previousCursor")
     s"""
-      final InternalRow $tmpInput = $input;
-      if ($tmpInput instanceof UnsafeRow) {
-        ${writeUnsafeData(ctx, s"((UnsafeRow) $tmpInput)", structRowWriter)}
-      } else {
-        ${writeExpressionsToBuffer(ctx, tmpInput, fieldEvals, fieldTypes, structRowWriter)}
-      }
-    """
+       |final InternalRow $tmpInput = $input;
+       |if ($tmpInput instanceof UnsafeRow) {
+       |  $rowWriter.write($index, (UnsafeRow) $tmpInput);
+       |} else {
+       |  // Remember the current cursor so that we can calculate how many bytes are
+       |  // written later.
+       |  final int $previousCursor = $rowWriter.cursor();
+       |  ${writeExpressionsToBuffer(ctx, tmpInput, fieldEvals, fieldTypes, structRowWriter)}
+       |  $rowWriter.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
+       |}
+     """.stripMargin
   }
 
   private def writeExpressionsToBuffer(
@@ -95,10 +99,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
 
     val writeFields = inputs.zip(inputTypes).zipWithIndex.map {
       case ((input, dataType), index) =>
-        val dt = dataType match {
-          case udt: UserDefinedType[_] => udt.sqlType
-          case other => other
-        }
+        val dt = UserDefinedType.sqlType(dataType)
 
         val setNull = dt match {
           case t: DecimalType if t.precision > Decimal.MAX_LONG_DIGITS =>
@@ -106,58 +107,22 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
             s"$rowWriter.write($index, (Decimal) null, ${t.precision}, ${t.scale});"
           case _ => s"$rowWriter.setNullAt($index);"
         }
-        val previousCursor = ctx.freshName("previousCursor")
-
-        val writeField = dt match {
-          case t: StructType =>
-            s"""
-              // Remember the current cursor so that we can calculate how many bytes are
-              // written later.
-              final int $previousCursor = $rowWriter.cursor();
-              ${writeStructToBuffer(ctx, input.value, t.map(_.dataType), rowWriter)}
-              $rowWriter.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
-            """
-
-          case a @ ArrayType(et, _) =>
-            s"""
-              // Remember the current cursor so that we can calculate how many bytes are
-              // written later.
-              final int $previousCursor = $rowWriter.cursor();
-              ${writeArrayToBuffer(ctx, input.value, et, rowWriter)}
-              $rowWriter.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
-            """
-
-          case m @ MapType(kt, vt, _) =>
-            s"""
-              // Remember the current cursor so that we can calculate how many bytes are
-              // written later.
-              final int $previousCursor = $rowWriter.cursor();
-              ${writeMapToBuffer(ctx, input.value, kt, vt, rowWriter)}
-              $rowWriter.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
-            """
-
-          case t: DecimalType =>
-            s"$rowWriter.write($index, ${input.value}, ${t.precision}, ${t.scale});"
-
-          case NullType => ""
-
-          case _ => s"$rowWriter.write($index, ${input.value});"
-        }
 
-        if (input.isNull == "false") {
+        val writeField = writeElement(ctx, input.value, index.toString, dt, rowWriter)
+        if (input.isNull == FalseLiteral) {
           s"""
-            ${input.code}
-            ${writeField.trim}
-          """
+             |${input.code}
+             |${writeField.trim}
+           """.stripMargin
         } else {
           s"""
-            ${input.code}
-            if (${input.isNull}) {
-              ${setNull.trim}
-            } else {
-              ${writeField.trim}
-            }
-          """
+             |${input.code}
+             |if (${input.isNull}) {
+             |  ${setNull.trim}
+             |} else {
+             |  ${writeField.trim}
+             |}
+           """.stripMargin
         }
     }
 
@@ -171,11 +136,10 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
         funcName = "writeFields",
         arguments = Seq("InternalRow" -> row))
     }
-
     s"""
-      $resetWriter
-      $writeFieldsCode
-    """.trim
+       |$resetWriter
+       |$writeFieldsCode
+     """.stripMargin
   }
 
   // TODO: if the nullability of array element is correct, we can use it to save null check.
@@ -189,10 +153,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val numElements = ctx.freshName("numElements")
     val index = ctx.freshName("index")
 
-    val et = elementType match {
-      case udt: UserDefinedType[_] => udt.sqlType
-      case other => other
-    }
+    val et = UserDefinedType.sqlType(elementType)
 
     val jt = CodeGenerator.javaType(et)
 
@@ -205,106 +166,100 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val arrayWriterClass = classOf[UnsafeArrayWriter].getName
     val arrayWriter = ctx.addMutableState(arrayWriterClass, "arrayWriter",
       v => s"$v = new $arrayWriterClass($rowWriter, $elementOrOffsetSize);")
-    val previousCursor = ctx.freshName("previousCursor")
 
     val element = CodeGenerator.getValue(tmpInput, et, index)
-    val writeElement = et match {
-      case t: StructType =>
-        s"""
-          final int $previousCursor = $arrayWriter.cursor();
-          ${writeStructToBuffer(ctx, element, t.map(_.dataType), arrayWriter)}
-          $arrayWriter.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
-        """
-
-      case a @ ArrayType(et, _) =>
-        s"""
-          final int $previousCursor = $arrayWriter.cursor();
-          ${writeArrayToBuffer(ctx, element, et, arrayWriter)}
-          $arrayWriter.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
-        """
-
-      case m @ MapType(kt, vt, _) =>
-        s"""
-          final int $previousCursor = $arrayWriter.cursor();
-          ${writeMapToBuffer(ctx, element, kt, vt, arrayWriter)}
-          $arrayWriter.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
-        """
-
-      case t: DecimalType =>
-        s"$arrayWriter.write($index, $element, ${t.precision}, ${t.scale});"
-
-      case NullType => ""
-
-      case _ => s"$arrayWriter.write($index, $element);"
-    }
 
-    val primitiveTypeName =
-      if (CodeGenerator.isPrimitiveType(jt)) CodeGenerator.primitiveTypeName(et) else ""
     s"""
-      final ArrayData $tmpInput = $input;
-      if ($tmpInput instanceof UnsafeArrayData) {
-        ${writeUnsafeData(ctx, s"((UnsafeArrayData) $tmpInput)", arrayWriter)}
-      } else {
-        final int $numElements = $tmpInput.numElements();
-        $arrayWriter.initialize($numElements);
-
-        for (int $index = 0; $index < $numElements; $index++) {
-          if ($tmpInput.isNullAt($index)) {
-            $arrayWriter.setNull${elementOrOffsetSize}Bytes($index);
-          } else {
-            $writeElement
-          }
-        }
-      }
-    """
+       |final ArrayData $tmpInput = $input;
+       |if ($tmpInput instanceof UnsafeArrayData) {
+       |  $rowWriter.write((UnsafeArrayData) $tmpInput);
+       |} else {
+       |  final int $numElements = $tmpInput.numElements();
+       |  $arrayWriter.initialize($numElements);
+       |
+       |  for (int $index = 0; $index < $numElements; $index++) {
+       |    if ($tmpInput.isNullAt($index)) {
+       |      $arrayWriter.setNull${elementOrOffsetSize}Bytes($index);
+       |    } else {
+       |      ${writeElement(ctx, element, index, et, arrayWriter)}
+       |    }
+       |  }
+       |}
+     """.stripMargin
   }
 
   // TODO: if the nullability of value element is correct, we can use it to save null check.
   private def writeMapToBuffer(
       ctx: CodegenContext,
       input: String,
+      index: String,
       keyType: DataType,
       valueType: DataType,
       rowWriter: String): String = {
     // Puts `input` in a local variable to avoid to re-evaluate it if it's a statement.
     val tmpInput = ctx.freshName("tmpInput")
     val tmpCursor = ctx.freshName("tmpCursor")
+    val previousCursor = ctx.freshName("previousCursor")
 
     // Writes out unsafe map according to the format described in `UnsafeMapData`.
     s"""
-      final MapData $tmpInput = $input;
-      if ($tmpInput instanceof UnsafeMapData) {
-        ${writeUnsafeData(ctx, s"((UnsafeMapData) $tmpInput)", rowWriter)}
-      } else {
-        // preserve 8 bytes to write the key array numBytes later.
-        $rowWriter.grow(8);
-        $rowWriter.increaseCursor(8);
+       |final MapData $tmpInput = $input;
+       |if ($tmpInput instanceof UnsafeMapData) {
+       |  $rowWriter.write($index, (UnsafeMapData) $tmpInput);
+       |} else {
+       |  // Remember the current cursor so that we can calculate how many bytes are
+       |  // written later.
+       |  final int $previousCursor = $rowWriter.cursor();
+       |
+       |  // preserve 8 bytes to write the key array numBytes later.
+       |  $rowWriter.grow(8);
+       |  $rowWriter.increaseCursor(8);
+       |
+       |  // Remember the current cursor so that we can write numBytes of key array later.
+       |  final int $tmpCursor = $rowWriter.cursor();
+       |
+       |  ${writeArrayToBuffer(ctx, s"$tmpInput.keyArray()", keyType, rowWriter)}
+       |
+       |  // Write the numBytes of key array into the first 8 bytes.
+       |  Platform.putLong(
+       |    $rowWriter.getBuffer(),
+       |    $tmpCursor - 8,
+       |    $rowWriter.cursor() - $tmpCursor);
+       |
+       |  ${writeArrayToBuffer(ctx, s"$tmpInput.valueArray()", valueType, rowWriter)}
+       |  $rowWriter.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
+       |}
+     """.stripMargin
+  }
 
-        // Remember the current cursor so that we can write numBytes of key array later.
-        final int $tmpCursor = $rowWriter.cursor();
+  private def writeElement(
+      ctx: CodegenContext,
+      input: String,
+      index: String,
+      dt: DataType,
+      writer: String): String = dt match {
+    case t: StructType =>
+      writeStructToBuffer(ctx, input, index, t.map(_.dataType), writer)
+
+    case ArrayType(et, _) =>
+      val previousCursor = ctx.freshName("previousCursor")
+      s"""
+         |// Remember the current cursor so that we can calculate how many bytes are
+         |// written later.
+         |final int $previousCursor = $writer.cursor();
+         |${writeArrayToBuffer(ctx, input, et, writer)}
+         |$writer.setOffsetAndSizeFromPreviousCursor($index, $previousCursor);
+       """.stripMargin
 
-        ${writeArrayToBuffer(ctx, s"$tmpInput.keyArray()", keyType, rowWriter)}
-        // Write the numBytes of key array into the first 8 bytes.
-        Platform.putLong($rowWriter.getBuffer(), $tmpCursor - 8, $rowWriter.cursor() - $tmpCursor);
+    case MapType(kt, vt, _) =>
+      writeMapToBuffer(ctx, input, index, kt, vt, writer)
 
-        ${writeArrayToBuffer(ctx, s"$tmpInput.valueArray()", valueType, rowWriter)}
-      }
-    """
-  }
+    case DecimalType.Fixed(precision, scale) =>
+      s"$writer.write($index, $input, $precision, $scale);"
 
-  /**
-   * If the input is already in unsafe format, we don't need to go through all elements/fields,
-   * we can directly write it.
-   */
-  private def writeUnsafeData(ctx: CodegenContext, input: String, rowWriter: String) = {
-    val sizeInBytes = ctx.freshName("sizeInBytes")
-    s"""
-      final int $sizeInBytes = $input.getSizeInBytes();
-      // grow the global buffer before writing data.
-      $rowWriter.grow($sizeInBytes);
-      $input.writeToMemory($rowWriter.getBuffer(), $rowWriter.cursor());
-      $rowWriter.increaseCursor($sizeInBytes);
-    """
+    case NullType => ""
+
+    case _ => s"$writer.write($index, $input);"
   }
 
   def createCode(
@@ -332,13 +287,12 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
 
     val code =
       s"""
-        $rowWriter.reset();
-        $evalSubexpr
-        $writeExpressions
-      """
+         |$rowWriter.reset();
+         |$evalSubexpr
+         |$writeExpressions
+       """.stripMargin
     // `rowWriter` is declared as a class field, so we can access it directly in methods.
-    ExprCode(code, FalseLiteral, StatementValue(s"$rowWriter.getRow()", "UnsafeRow",
-      canDirectAccess = true))
+    ExprCode(code, FalseLiteral, JavaCode.expression(s"$rowWriter.getRow()", classOf[UnsafeRow]))
   }
 
   protected def canonicalize(in: Seq[Expression]): Seq[Expression] =
@@ -363,38 +317,39 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val ctx = newCodeGenContext()
     val eval = createCode(ctx, expressions, subexpressionEliminationEnabled)
 
-    val codeBody = s"""
-      public java.lang.Object generate(Object[] references) {
-        return new SpecificUnsafeProjection(references);
-      }
-
-      class SpecificUnsafeProjection extends ${classOf[UnsafeProjection].getName} {
-
-        private Object[] references;
-        ${ctx.declareMutableStates()}
-
-        public SpecificUnsafeProjection(Object[] references) {
-          this.references = references;
-          ${ctx.initMutableStates()}
-        }
-
-        public void initialize(int partitionIndex) {
-          ${ctx.initPartition()}
-        }
-
-        // Scala.Function1 need this
-        public java.lang.Object apply(java.lang.Object row) {
-          return apply((InternalRow) row);
-        }
-
-        public UnsafeRow apply(InternalRow ${ctx.INPUT_ROW}) {
-          ${eval.code.trim}
-          return ${eval.value};
-        }
-
-        ${ctx.declareAddedFunctions()}
-      }
-      """
+    val codeBody =
+      s"""
+         |public java.lang.Object generate(Object[] references) {
+         |  return new SpecificUnsafeProjection(references);
+         |}
+         |
+         |class SpecificUnsafeProjection extends ${classOf[UnsafeProjection].getName} {
+         |
+         |  private Object[] references;
+         |  ${ctx.declareMutableStates()}
+         |
+         |  public SpecificUnsafeProjection(Object[] references) {
+         |    this.references = references;
+         |    ${ctx.initMutableStates()}
+         |  }
+         |
+         |  public void initialize(int partitionIndex) {
+         |    ${ctx.initPartition()}
+         |  }
+         |
+         |  // Scala.Function1 need this
+         |  public java.lang.Object apply(java.lang.Object row) {
+         |    return apply((InternalRow) row);
+         |  }
+         |
+         |  public UnsafeRow apply(InternalRow ${ctx.INPUT_ROW}) {
+         |    ${eval.code.trim}
+         |    return ${eval.value};
+         |  }
+         |
+         |  ${ctx.declareAddedFunctions()}
+         |}
+       """.stripMargin
 
     val code = CodeFormatter.stripOverlappingComments(
       new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
new file mode 100644
index 0000000000000..74ff018488863
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import java.lang.{Boolean => JBool}
+
+import scala.language.{existentials, implicitConversions}
+
+import org.apache.spark.sql.types.{BooleanType, DataType}
+
+/**
+ * Trait representing an opaque fragments of java code.
+ */
+trait JavaCode {
+  def code: String
+  override def toString: String = code
+}
+
+/**
+ * Utility functions for creating [[JavaCode]] fragments.
+ */
+object JavaCode {
+  /**
+   * Create a java literal.
+   */
+  def literal(v: String, dataType: DataType): LiteralValue = dataType match {
+    case BooleanType if v == "true" => TrueLiteral
+    case BooleanType if v == "false" => FalseLiteral
+    case _ => new LiteralValue(v, CodeGenerator.javaClass(dataType))
+  }
+
+  /**
+   * Create a default literal. This is null for reference types, false for boolean types and
+   * -1 for other primitive types.
+   */
+  def defaultLiteral(dataType: DataType): LiteralValue = {
+    new LiteralValue(
+      CodeGenerator.defaultValue(dataType, typedNull = true),
+      CodeGenerator.javaClass(dataType))
+  }
+
+  /**
+   * Create a local java variable.
+   */
+  def variable(name: String, dataType: DataType): VariableValue = {
+    variable(name, CodeGenerator.javaClass(dataType))
+  }
+
+  /**
+   * Create a local java variable.
+   */
+  def variable(name: String, javaClass: Class[_]): VariableValue = {
+    VariableValue(name, javaClass)
+  }
+
+  /**
+   * Create a local isNull variable.
+   */
+  def isNullVariable(name: String): VariableValue = variable(name, BooleanType)
+
+  /**
+   * Create a global java variable.
+   */
+  def global(name: String, dataType: DataType): GlobalValue = {
+    global(name, CodeGenerator.javaClass(dataType))
+  }
+
+  /**
+   * Create a global java variable.
+   */
+  def global(name: String, javaClass: Class[_]): GlobalValue = {
+    GlobalValue(name, javaClass)
+  }
+
+  /**
+   * Create a global isNull variable.
+   */
+  def isNullGlobal(name: String): GlobalValue = global(name, BooleanType)
+
+  /**
+   * Create an expression fragment.
+   */
+  def expression(code: String, dataType: DataType): SimpleExprValue = {
+    expression(code, CodeGenerator.javaClass(dataType))
+  }
+
+  /**
+   * Create an expression fragment.
+   */
+  def expression(code: String, javaClass: Class[_]): SimpleExprValue = {
+    SimpleExprValue(code, javaClass)
+  }
+
+  /**
+   * Create a isNull expression fragment.
+   */
+  def isNullExpression(code: String): SimpleExprValue = {
+    expression(code, BooleanType)
+  }
+}
+
+/**
+ * A typed java fragment that must be a valid java expression.
+ */
+trait ExprValue extends JavaCode {
+  def javaType: Class[_]
+  def isPrimitive: Boolean = javaType.isPrimitive
+}
+
+object ExprValue {
+  implicit def exprValueToString(exprValue: ExprValue): String = exprValue.toString
+}
+
+
+/**
+ * A java expression fragment.
+ */
+case class SimpleExprValue(expr: String, javaType: Class[_]) extends ExprValue {
+  override def code: String = s"($expr)"
+}
+
+/**
+ * A local variable java expression.
+ */
+case class VariableValue(variableName: String, javaType: Class[_]) extends ExprValue {
+  override def code: String = variableName
+}
+
+/**
+ * A global variable java expression.
+ */
+case class GlobalValue(value: String, javaType: Class[_]) extends ExprValue {
+  override def code: String = value
+}
+
+/**
+ * A literal java expression.
+ */
+class LiteralValue(val value: String, val javaType: Class[_]) extends ExprValue with Serializable {
+  override def code: String = value
+
+  override def equals(arg: Any): Boolean = arg match {
+    case l: LiteralValue => l.javaType == javaType && l.value == value
+    case _ => false
+  }
+
+  override def hashCode(): Int = value.hashCode() * 31 + javaType.hashCode()
+}
+
+case object TrueLiteral extends LiteralValue("true", JBool.TYPE)
+case object FalseLiteral extends LiteralValue("false", JBool.TYPE)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index c3e78935386f7..a43376c0a66d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -21,7 +21,7 @@ import java.util.Comparator
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData, TypeUtils}
 import org.apache.spark.sql.types._
 
 /**
@@ -288,7 +288,6 @@ case class ArrayContains(left: Expression, right: Expression)
   override def prettyName: String = "array_contains"
 }
 
-
 /**
  * Checks if the two arrays contain at least one common element.
  */
@@ -396,3 +395,133 @@ case class ArraysOverlap(left: Expression, right: Expression)
     })
   }
 }
+
+/**
+ * Returns the minimum value in the array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array) - Returns the minimum value in the array. NULL elements are skipped.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(1, 20, null, 3));
+       1
+  """, since = "2.4.0")
+case class ArrayMin(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
+
+  override def nullable: Boolean = true
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  private lazy val ordering = TypeUtils.getInterpretedOrdering(dataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val typeCheckResult = super.checkInputDataTypes()
+    if (typeCheckResult.isSuccess) {
+      TypeUtils.checkForOrderingExpr(dataType, s"function $prettyName")
+    } else {
+      typeCheckResult
+    }
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val childGen = child.genCode(ctx)
+    val javaType = CodeGenerator.javaType(dataType)
+    val i = ctx.freshName("i")
+    val item = ExprCode("",
+      isNull = JavaCode.isNullExpression(s"${childGen.value}.isNullAt($i)"),
+      value = JavaCode.expression(CodeGenerator.getValue(childGen.value, dataType, i), dataType))
+    ev.copy(code =
+      s"""
+         |${childGen.code}
+         |boolean ${ev.isNull} = true;
+         |$javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+         |if (!${childGen.isNull}) {
+         |  for (int $i = 0; $i < ${childGen.value}.numElements(); $i ++) {
+         |    ${ctx.reassignIfSmaller(dataType, ev, item)}
+         |  }
+         |}
+      """.stripMargin)
+  }
+
+  override protected def nullSafeEval(input: Any): Any = {
+    var min: Any = null
+    input.asInstanceOf[ArrayData].foreach(dataType, (_, item) =>
+      if (item != null && (min == null || ordering.lt(item, min))) {
+        min = item
+      }
+    )
+    min
+  }
+
+  override def dataType: DataType = child.dataType match {
+    case ArrayType(dt, _) => dt
+    case _ => throw new IllegalStateException(s"$prettyName accepts only arrays.")
+  }
+
+  override def prettyName: String = "array_min"
+}
+
+/**
+ * Returns the maximum value in the array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array) - Returns the maximum value in the array. NULL elements are skipped.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(1, 20, null, 3));
+       20
+  """, since = "2.4.0")
+case class ArrayMax(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
+
+  override def nullable: Boolean = true
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  private lazy val ordering = TypeUtils.getInterpretedOrdering(dataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val typeCheckResult = super.checkInputDataTypes()
+    if (typeCheckResult.isSuccess) {
+      TypeUtils.checkForOrderingExpr(dataType, s"function $prettyName")
+    } else {
+      typeCheckResult
+    }
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val childGen = child.genCode(ctx)
+    val javaType = CodeGenerator.javaType(dataType)
+    val i = ctx.freshName("i")
+    val item = ExprCode("",
+      isNull = JavaCode.isNullExpression(s"${childGen.value}.isNullAt($i)"),
+      value = JavaCode.expression(CodeGenerator.getValue(childGen.value, dataType, i), dataType))
+    ev.copy(code =
+      s"""
+         |${childGen.code}
+         |boolean ${ev.isNull} = true;
+         |$javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+         |if (!${childGen.isNull}) {
+         |  for (int $i = 0; $i < ${childGen.value}.numElements(); $i ++) {
+         |    ${ctx.reassignIfGreater(dataType, ev, item)}
+         |  }
+         |}
+      """.stripMargin)
+  }
+
+  override protected def nullSafeEval(input: Any): Any = {
+    var max: Any = null
+    input.asInstanceOf[ArrayData].foreach(dataType, (_, item) =>
+      if (item != null && (max == null || ordering.gt(item, max))) {
+        max = item
+      }
+    )
+    max
+  }
+
+  override def dataType: DataType = child.dataType match {
+    case ArrayType(dt, _) => dt
+    case _ => throw new IllegalStateException(s"$prettyName accepts only arrays.")
+  }
+
+  override def prettyName: String = "array_max"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 49a8d12057188..67876a8565488 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -64,7 +64,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
       GenArrayData.genCodeToCreateArrayData(ctx, et, evals, false)
     ev.copy(
       code = preprocess + assigns + postprocess,
-      value = VariableValue(arrayData, CodeGenerator.javaType(dataType)),
+      value = JavaCode.variable(arrayData, dataType),
       isNull = FalseLiteral)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 409c0b6b79b81..205d77f6a9acf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -191,8 +191,9 @@ case class CaseWhen(
     // It is initialized to `NOT_MATCHED`, and if it's set to `HAS_NULL` or `HAS_NONNULL`,
     // We won't go on anymore on the computation.
     val resultState = ctx.freshName("caseWhenResultState")
-    ev.value = GlobalValue(ctx.addMutableState(CodeGenerator.javaType(dataType), ev.value),
-      CodeGenerator.javaType(dataType))
+    ev.value = JavaCode.global(
+      ctx.addMutableState(CodeGenerator.javaType(dataType), ev.value),
+      dataType)
 
     // these blocks are meant to be inside a
     // do {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 49dd988b4b53c..b9b2cd5bdb9f0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -426,36 +426,71 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa
   """,
   since = "2.3.0")
 // scalastyle:on line.size.limit
-case class DayOfWeek(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
+case class DayOfWeek(child: Expression) extends DayWeek {
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
-
-  override def dataType: DataType = IntegerType
+  override protected def nullSafeEval(date: Any): Any = {
+    cal.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L)
+    cal.get(Calendar.DAY_OF_WEEK)
+  }
 
-  @transient private lazy val c = {
-    Calendar.getInstance(DateTimeUtils.getTimeZone("UTC"))
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    nullSafeCodeGen(ctx, ev, time => {
+      val cal = classOf[Calendar].getName
+      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+      val c = "calDayOfWeek"
+      ctx.addImmutableStateIfNotExists(cal, c,
+        v => s"""$v = $cal.getInstance($dtu.getTimeZone("UTC"));""")
+      s"""
+        $c.setTimeInMillis($time * 1000L * 3600L * 24L);
+        ${ev.value} = $c.get($cal.DAY_OF_WEEK);
+      """
+    })
   }
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(date) - Returns the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday).",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('2009-07-30');
+       3
+  """,
+  since = "2.4.0")
+// scalastyle:on line.size.limit
+case class WeekDay(child: Expression) extends DayWeek {
 
   override protected def nullSafeEval(date: Any): Any = {
-    c.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L)
-    c.get(Calendar.DAY_OF_WEEK)
+    cal.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L)
+    (cal.get(Calendar.DAY_OF_WEEK) + 5 ) % 7
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, time => {
       val cal = classOf[Calendar].getName
       val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-      val c = "calDayOfWeek"
+      val c = "calWeekDay"
       ctx.addImmutableStateIfNotExists(cal, c,
         v => s"""$v = $cal.getInstance($dtu.getTimeZone("UTC"));""")
       s"""
         $c.setTimeInMillis($time * 1000L * 3600L * 24L);
-        ${ev.value} = $c.get($cal.DAY_OF_WEEK);
+        ${ev.value} = ($c.get($cal.DAY_OF_WEEK) + 5) % 7;
       """
     })
   }
 }
 
+abstract class DayWeek extends UnaryExpression with ImplicitCastInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
+
+  override def dataType: DataType = IntegerType
+
+  @transient protected lazy val cal: Calendar = {
+    Calendar.getInstance(DateTimeUtils.getTimeZone("UTC"))
+  }
+}
+
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.",
@@ -813,8 +848,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
     val df = classOf[DateFormat].getName
     if (format.foldable) {
       if (formatter == null) {
-        ExprCode("", TrueLiteral, LiteralValue("(UTF8String) null",
-          CodeGenerator.javaType(dataType)))
+        ExprCode.forNullValue(StringType)
       } else {
         val formatterName = ctx.addReferenceObj("formatter", formatter, df)
         val t = left.genCode(ctx)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 742a650eb445d..246025b82d59e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -281,38 +281,41 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
     if (value == null) {
       ExprCode.forNullValue(dataType)
     } else {
+      def toExprCode(code: String): ExprCode = {
+        ExprCode.forNonNullValue(JavaCode.literal(code, dataType))
+      }
       dataType match {
         case BooleanType | IntegerType | DateType =>
-          ExprCode.forNonNullValue(LiteralValue(value.toString, javaType))
+          toExprCode(value.toString)
         case FloatType =>
           value.asInstanceOf[Float] match {
             case v if v.isNaN =>
-              ExprCode.forNonNullValue(LiteralValue("Float.NaN", javaType))
+              toExprCode("Float.NaN")
             case Float.PositiveInfinity =>
-              ExprCode.forNonNullValue(LiteralValue("Float.POSITIVE_INFINITY", javaType))
+              toExprCode("Float.POSITIVE_INFINITY")
             case Float.NegativeInfinity =>
-              ExprCode.forNonNullValue(LiteralValue("Float.NEGATIVE_INFINITY", javaType))
+              toExprCode("Float.NEGATIVE_INFINITY")
             case _ =>
-              ExprCode.forNonNullValue(LiteralValue(s"${value}F", javaType))
+              toExprCode(s"${value}F")
           }
         case DoubleType =>
           value.asInstanceOf[Double] match {
             case v if v.isNaN =>
-              ExprCode.forNonNullValue(LiteralValue("Double.NaN", javaType))
+              toExprCode("Double.NaN")
             case Double.PositiveInfinity =>
-              ExprCode.forNonNullValue(LiteralValue("Double.POSITIVE_INFINITY", javaType))
+              toExprCode("Double.POSITIVE_INFINITY")
             case Double.NegativeInfinity =>
-              ExprCode.forNonNullValue(LiteralValue("Double.NEGATIVE_INFINITY", javaType))
+              toExprCode("Double.NEGATIVE_INFINITY")
             case _ =>
-              ExprCode.forNonNullValue(LiteralValue(s"${value}D", javaType))
+              toExprCode(s"${value}D")
           }
         case ByteType | ShortType =>
-          ExprCode.forNonNullValue(LiteralValue(s"($javaType)$value", javaType))
+          ExprCode.forNonNullValue(JavaCode.expression(s"($javaType)$value", dataType))
         case TimestampType | LongType =>
-          ExprCode.forNonNullValue(LiteralValue(s"${value}L", javaType))
+          toExprCode(s"${value}L")
         case _ =>
           val constRef = ctx.addReferenceObj("literal", value, javaType)
-          ExprCode.forNonNullValue(GlobalValue(constRef, javaType))
+          ExprCode.forNonNullValue(JavaCode.global(constRef, dataType))
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 7081a5e096d56..7eda65a867028 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -92,7 +92,7 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa
        |if (${eval.isNull} || !${eval.value}) {
        |  throw new RuntimeException($errMsgField);
        |}""".stripMargin, isNull = TrueLiteral,
-      value = LiteralValue("null", CodeGenerator.javaType(dataType)))
+      value = JavaCode.defaultLiteral(dataType))
   }
 
   override def sql: String = s"assert_true(${child.sql})"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 55b6e346be82a..0787342bce6bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -72,8 +72,7 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    ev.isNull = GlobalValue(ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, ev.isNull),
-      CodeGenerator.JAVA_BOOLEAN)
+    ev.isNull = JavaCode.isNullGlobal(ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, ev.isNull))
 
     // all the evals are meant to be in a do { ... } while (false); loop
     val evals = children.map { e =>
@@ -321,12 +320,7 @@ case class IsNull(child: Expression) extends UnaryExpression with Predicate {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = child.genCode(ctx)
-    val value = if (eval.isNull.isInstanceOf[LiteralValue]) {
-      LiteralValue(eval.isNull, CodeGenerator.JAVA_BOOLEAN)
-    } else {
-      VariableValue(eval.isNull, CodeGenerator.JAVA_BOOLEAN)
-    }
-    ExprCode(code = eval.code, isNull = FalseLiteral, value = value)
+    ExprCode(code = eval.code, isNull = FalseLiteral, value = eval.isNull)
   }
 
   override def sql: String = s"(${child.sql} IS NULL)"
@@ -352,12 +346,10 @@ case class IsNotNull(child: Expression) extends UnaryExpression with Predicate {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = child.genCode(ctx)
-    val value = if (eval.isNull == TrueLiteral) {
-      FalseLiteral
-    } else if (eval.isNull == FalseLiteral) {
-      TrueLiteral
-    } else {
-      StatementValue(s"(!(${eval.isNull}))", CodeGenerator.javaType(dataType))
+    val value = eval.isNull match {
+      case TrueLiteral => FalseLiteral
+      case FalseLiteral => TrueLiteral
+      case v => JavaCode.isNullExpression(s"!$v")
     }
     ExprCode(code = eval.code, isNull = FalseLiteral, value = value)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index b2cca3178cd2a..77802e89e942b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -65,7 +65,7 @@ trait InvokeLike extends Expression with NonSQLExpression {
 
     val resultIsNull = if (needNullCheck) {
       val resultIsNull = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "resultIsNull")
-      GlobalValue(resultIsNull, CodeGenerator.JAVA_BOOLEAN)
+      JavaCode.isNullGlobal(resultIsNull)
     } else {
       FalseLiteral
     }
@@ -560,21 +560,26 @@ case class LambdaVariable(
     dataType: DataType,
     nullable: Boolean = true) extends LeafExpression with NonSQLExpression {
 
+  private val accessor: (InternalRow, Int) => Any = InternalRow.getAccessor(dataType)
+
   // Interpreted execution of `LambdaVariable` always get the 0-index element from input row.
   override def eval(input: InternalRow): Any = {
     assert(input.numFields == 1,
       "The input row of interpreted LambdaVariable should have only 1 field.")
-    input.get(0, dataType)
+    if (nullable && input.isNullAt(0)) {
+      null
+    } else {
+      accessor(input, 0)
+    }
   }
 
   override def genCode(ctx: CodegenContext): ExprCode = {
     val isNullValue = if (nullable) {
-      VariableValue(isNull, CodeGenerator.JAVA_BOOLEAN)
+      JavaCode.isNullVariable(isNull)
     } else {
       FalseLiteral
     }
-    ExprCode(code = "", value = VariableValue(value, CodeGenerator.javaType(dataType)),
-      isNull = isNullValue)
+    ExprCode(value = JavaCode.variable(value, dataType), isNull = isNullValue)
   }
 
   // This won't be called as `genCode` is overrided, just overriding it to make
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 9a1bbc675e397..5fb59ef350b8b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -138,6 +138,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
     operatorOptimizationBatch) :+
     Batch("Join Reorder", Once,
       CostBasedJoinReorder) :+
+    Batch("Remove Redundant Sorts", Once,
+      RemoveRedundantSorts) :+
     Batch("Decimal Optimizations", fixedPoint,
       DecimalAggregates) :+
     Batch("Object Expressions Optimization", fixedPoint,
@@ -733,6 +735,16 @@ object EliminateSorts extends Rule[LogicalPlan] {
   }
 }
 
+/**
+ * Removes Sort operation if the child is already sorted
+ */
+object RemoveRedundantSorts extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case Sort(orders, true, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
+      child
+  }
+}
+
 /**
  * Removes filters that can be evaluated trivially.  This can be done through the following ways:
  * 1) by eliding the filter for cases where it will always evaluate to `true`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index c8ccd9bd03994..42034403d6d03 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -219,6 +219,11 @@ abstract class LogicalPlan
    * Refreshes (or invalidates) any metadata/data cached in the plan recursively.
    */
   def refresh(): Unit = children.foreach(_.refresh())
+
+  /**
+   * Returns the output ordering that this plan generates.
+   */
+  def outputOrdering: Seq[SortOrder] = Nil
 }
 
 /**
@@ -274,3 +279,7 @@ abstract class BinaryNode extends LogicalPlan {
 
   override final def children: Seq[LogicalPlan] = Seq(left, right)
 }
+
+abstract class OrderPreservingUnaryNode extends UnaryNode {
+  override final def outputOrdering: Seq[SortOrder] = child.outputOrdering
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index a4fca790dd086..10df504795430 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -43,11 +43,12 @@ case class ReturnAnswer(child: LogicalPlan) extends UnaryNode {
  * This node is inserted at the top of a subquery when it is optimized. This makes sure we can
  * recognize a subquery as such, and it allows us to write subquery aware transformations.
  */
-case class Subquery(child: LogicalPlan) extends UnaryNode {
+case class Subquery(child: LogicalPlan) extends OrderPreservingUnaryNode {
   override def output: Seq[Attribute] = child.output
 }
 
-case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extends UnaryNode {
+case class Project(projectList: Seq[NamedExpression], child: LogicalPlan)
+    extends OrderPreservingUnaryNode {
   override def output: Seq[Attribute] = projectList.map(_.toAttribute)
   override def maxRows: Option[Long] = child.maxRows
 
@@ -125,7 +126,7 @@ case class Generate(
 }
 
 case class Filter(condition: Expression, child: LogicalPlan)
-  extends UnaryNode with PredicateHelper {
+  extends OrderPreservingUnaryNode with PredicateHelper {
   override def output: Seq[Attribute] = child.output
 
   override def maxRows: Option[Long] = child.maxRows
@@ -469,6 +470,7 @@ case class Sort(
     child: LogicalPlan) extends UnaryNode {
   override def output: Seq[Attribute] = child.output
   override def maxRows: Option[Long] = child.maxRows
+  override def outputOrdering: Seq[SortOrder] = order
 }
 
 /** Factory for constructing new `Range` nodes. */
@@ -522,6 +524,15 @@ case class Range(
   override def computeStats(): Statistics = {
     Statistics(sizeInBytes = LongType.defaultSize * numElements)
   }
+
+  override def outputOrdering: Seq[SortOrder] = {
+    val order = if (step > 0) {
+      Ascending
+    } else {
+      Descending
+    }
+    output.map(a => SortOrder(a, order))
+  }
 }
 
 case class Aggregate(
@@ -728,7 +739,7 @@ object Limit {
  *
  * See [[Limit]] for more information.
  */
-case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNode {
+case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderPreservingUnaryNode {
   override def output: Seq[Attribute] = child.output
   override def maxRows: Option[Long] = {
     limitExpr match {
@@ -744,7 +755,7 @@ case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryN
  *
  * See [[Limit]] for more information.
  */
-case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNode {
+case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderPreservingUnaryNode {
   override def output: Seq[Attribute] = child.output
 
   override def maxRowsPerPartition: Option[Long] = {
@@ -764,7 +775,7 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNo
 case class SubqueryAlias(
     alias: String,
     child: LogicalPlan)
-  extends UnaryNode {
+  extends OrderPreservingUnaryNode {
 
   override def doCanonicalize(): LogicalPlan = child.canonicalized
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 1c8ab9c62623e..0dc47bfe075d0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -930,6 +930,13 @@ object SQLConf {
       .intConf
       .createWithDefault(100)
 
+  val STREAMING_CHECKPOINT_FILE_MANAGER_CLASS =
+    buildConf("spark.sql.streaming.checkpointFileManagerClass")
+      .doc("The class used to write checkpoint files atomically. This class must be a subclass " +
+        "of the interface CheckpointFileManager.")
+      .internal()
+      .stringConf
+
   val NDV_MAX_ERROR =
     buildConf("spark.sql.statistics.ndv.maxError")
       .internal()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
index 5a944e763e099..6af16e2dba105 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
@@ -97,6 +97,16 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa
   override def catalogString: String = sqlType.simpleString
 }
 
+private[spark] object UserDefinedType {
+  /**
+   * Get the sqlType of a (potential) [[UserDefinedType]].
+   */
+  def sqlType(dt: DataType): DataType = dt match {
+    case udt: UserDefinedType[_] => udt.sqlType
+    case _ => dt
+  }
+}
+
 /**
  * The user defined type in Python.
  *
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 8e83b35c3809c..f7c023111ff59 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -448,8 +448,9 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     val ref = BoundReference(0, IntegerType, true)
     val add1 = Add(ref, ref)
     val add2 = Add(add1, add1)
-    val dummy = SubExprEliminationState(VariableValue("dummy", "boolean"),
-      VariableValue("dummy", "boolean"))
+    val dummy = SubExprEliminationState(
+      JavaCode.variable("dummy", BooleanType),
+      JavaCode.variable("dummy", BooleanType))
 
     // raw testing of basic functionality
     {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 2e93d6f2533f1..b159f56204909 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -128,6 +128,25 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(ArraysOverlap(a5, a6), true)
     checkEvaluation(ArraysOverlap(a5, a7), null)
     checkEvaluation(ArraysOverlap(a6, a7), false)
+  }
+
+  test("Array Min") {
+    checkEvaluation(ArrayMin(Literal.create(Seq(-11, 10, 2), ArrayType(IntegerType))), -11)
+    checkEvaluation(
+      ArrayMin(Literal.create(Seq[String](null, "abc", ""), ArrayType(StringType))), "")
+    checkEvaluation(ArrayMin(Literal.create(Seq(null), ArrayType(LongType))), null)
+    checkEvaluation(ArrayMin(Literal.create(null, ArrayType(StringType))), null)
+    checkEvaluation(
+      ArrayMin(Literal.create(Seq(1.123, 0.1234, 1.121), ArrayType(DoubleType))), 0.1234)
+  }
 
+  test("Array max") {
+    checkEvaluation(ArrayMax(Literal.create(Seq(1, 10, 2), ArrayType(IntegerType))), 10)
+    checkEvaluation(
+      ArrayMax(Literal.create(Seq[String](null, "abc", ""), ArrayType(StringType))), "abc")
+    checkEvaluation(ArrayMax(Literal.create(Seq(null), ArrayType(LongType))), null)
+    checkEvaluation(ArrayMax(Literal.create(null, ArrayType(StringType))), null)
+    checkEvaluation(
+      ArrayMax(Literal.create(Seq(1.123, 0.1234, 1.121), ArrayType(DoubleType))), 1.123)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 786266a2c13c0..080ec487cfa6a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -211,6 +211,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkConsistencyBetweenInterpretedAndCodegen(DayOfWeek, DateType)
   }
 
+  test("WeekDay") {
+    checkEvaluation(WeekDay(Literal.create(null, DateType)), null)
+    checkEvaluation(WeekDay(Literal(d)), 2)
+    checkEvaluation(WeekDay(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 2)
+    checkEvaluation(WeekDay(Cast(Literal(ts), DateType, gmtId)), 4)
+    checkEvaluation(WeekDay(Cast(Literal("2011-05-06"), DateType, gmtId)), 4)
+    checkEvaluation(WeekDay(Literal(new Date(sdf.parse("2017-05-27 13:10:15").getTime))), 5)
+    checkEvaluation(WeekDay(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 4)
+    checkConsistencyBetweenInterpretedAndCodegen(WeekDay, DateType)
+  }
+
   test("WeekOfYear") {
     checkEvaluation(WeekOfYear(Literal.create(null, DateType)), null)
     checkEvaluation(WeekOfYear(Literal(d)), 15)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index a5ecd1b68fac4..b4bf6d7107d7e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -70,7 +70,9 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
    * Check the equality between result of expression and expected value, it will handle
    * Array[Byte], Spread[Double], MapData and Row.
    */
-  protected def checkResult(result: Any, expected: Any, dataType: DataType): Boolean = {
+  protected def checkResult(result: Any, expected: Any, exprDataType: DataType): Boolean = {
+    val dataType = UserDefinedType.sqlType(exprDataType)
+
     (result, expected) match {
       case (result: Array[Byte], expected: Array[Byte]) =>
         java.util.Arrays.equals(result, expected)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index b1bc67dfac1b5..b0188b0098def 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -21,13 +21,14 @@ import java.sql.{Date, Timestamp}
 
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
+import scala.util.Random
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{RandomDataGenerator, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.ResolveTimeZone
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util._
@@ -381,6 +382,39 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(decodeUsingSerializer, null, InternalRow.fromSeq(Seq(null)))
     }
   }
+
+  test("LambdaVariable should support interpreted execution") {
+    def genSchema(dt: DataType): Seq[StructType] = {
+      Seq(StructType(StructField("col_1", dt, nullable = false) :: Nil),
+        StructType(StructField("col_1", dt, nullable = true) :: Nil))
+    }
+
+    val elementTypes = Seq(BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType,
+      DoubleType, DecimalType.USER_DEFAULT, StringType, BinaryType, DateType, TimestampType,
+      CalendarIntervalType, new ExamplePointUDT())
+    val arrayTypes = elementTypes.flatMap { elementType =>
+      Seq(ArrayType(elementType, containsNull = false), ArrayType(elementType, containsNull = true))
+    }
+    val mapTypes = elementTypes.flatMap { elementType =>
+      Seq(MapType(elementType, elementType, false), MapType(elementType, elementType, true))
+    }
+    val structTypes = elementTypes.flatMap { elementType =>
+      Seq(StructType(StructField("col1", elementType, false) :: Nil),
+        StructType(StructField("col1", elementType, true) :: Nil))
+    }
+
+    val testTypes = elementTypes ++ arrayTypes ++ mapTypes ++ structTypes
+    val random = new Random(100)
+    testTypes.foreach { dt =>
+      genSchema(dt).map { schema =>
+        val row = RandomDataGenerator.randomRow(random, schema)
+        val rowConverter = RowEncoder(schema)
+        val internalRow = rowConverter.toRow(row)
+        val lambda = LambdaVariable("dummy", "dummuIsNull", schema(0).dataType, schema(0).nullable)
+        checkEvaluationWithoutCodegen(lambda, internalRow.get(0, schema(0).dataType), internalRow)
+      }
+    }
+  }
 }
 
 class TestBean extends Serializable {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValueSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValueSuite.scala
index c8f4cff7db48d..378b8bc055e34 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValueSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValueSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types.BooleanType
 
 class ExprValueSuite extends SparkFunSuite {
 
@@ -31,16 +32,7 @@ class ExprValueSuite extends SparkFunSuite {
     assert(trueLit.isPrimitive)
     assert(falseLit.isPrimitive)
 
-    trueLit match {
-      case LiteralValue(value, javaType) =>
-        assert(value == "true" && javaType == "boolean")
-      case _ => fail()
-    }
-
-    falseLit match {
-      case LiteralValue(value, javaType) =>
-        assert(value == "false" && javaType == "boolean")
-      case _ => fail()
-    }
+    assert(trueLit === JavaCode.literal("true", BooleanType))
+    assert(falseLit === JavaCode.literal("false", BooleanType))
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverMountSecretsStep.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilderSuite.scala
similarity index 51%
rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverMountSecretsStep.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilderSuite.scala
index 91e9a9f211335..1b25a4b191f86 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/DriverMountSecretsStep.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilderSuite.scala
@@ -1,38 +1,42 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit.steps
-
-import org.apache.spark.deploy.k8s.MountSecretsBootstrap
-import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
-
-/**
- * A driver configuration step for mounting user-specified secrets onto user-specified paths.
- *
- * @param bootstrap a utility actually handling mounting of the secrets.
- */
-private[spark] class DriverMountSecretsStep(
-    bootstrap: MountSecretsBootstrap) extends DriverConfigurationStep {
-
-  override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
-    val pod = bootstrap.addSecretVolumes(driverSpec.driverPod)
-    val container = bootstrap.mountSecrets(driverSpec.driverContainer)
-    driverSpec.copy(
-      driverPod = pod,
-      driverContainer = container
-    )
-  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.unsafe.types.UTF8String
+
+class UTF8StringBuilderSuite extends SparkFunSuite {
+
+  test("basic test") {
+    val sb = new UTF8StringBuilder()
+    assert(sb.build() === UTF8String.EMPTY_UTF8)
+
+    sb.append("")
+    assert(sb.build() === UTF8String.EMPTY_UTF8)
+
+    sb.append("abcd")
+    assert(sb.build() === UTF8String.fromString("abcd"))
+
+    sb.append(UTF8String.fromString("1234"))
+    assert(sb.build() === UTF8String.fromString("abcd1234"))
+
+    // expect to grow an internal buffer
+    sb.append(UTF8String.fromString("efgijk567890"))
+    assert(sb.build() === UTF8String.fromString("abcd1234efgijk567890"))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
new file mode 100644
index 0000000000000..2319ab8046e56
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry}
+import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, ORDER_BY_ORDINAL}
+
+class RemoveRedundantSortsSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Remove Redundant Sorts", Once,
+        RemoveRedundantSorts) ::
+      Batch("Collapse Project", Once,
+        CollapseProject) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+
+  test("remove redundant order by") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
+    val unnecessaryReordered = orderedPlan.select('a).orderBy('a.asc, 'b.desc_nullsFirst)
+    val optimized = Optimize.execute(unnecessaryReordered.analyze)
+    val correctAnswer = orderedPlan.select('a).analyze
+    comparePlans(Optimize.execute(optimized), correctAnswer)
+  }
+
+  test("do not remove sort if the order is different") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
+    val reorderedDifferently = orderedPlan.select('a).orderBy('a.asc, 'b.desc)
+    val optimized = Optimize.execute(reorderedDifferently.analyze)
+    val correctAnswer = reorderedDifferently.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("filters don't affect order") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
+    val filteredAndReordered = orderedPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc)
+    val optimized = Optimize.execute(filteredAndReordered.analyze)
+    val correctAnswer = orderedPlan.where('a > Literal(10)).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("limits don't affect order") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
+    val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy('a.asc, 'b.desc)
+    val optimized = Optimize.execute(filteredAndReordered.analyze)
+    val correctAnswer = orderedPlan.limit(Literal(10)).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("range is already sorted") {
+    val inputPlan = Range(1L, 1000L, 1, 10)
+    val orderedPlan = inputPlan.orderBy('id.asc)
+    val optimized = Optimize.execute(orderedPlan.analyze)
+    val correctAnswer = inputPlan.analyze
+    comparePlans(optimized, correctAnswer)
+
+    val reversedPlan = inputPlan.orderBy('id.desc)
+    val reversedOptimized = Optimize.execute(reversedPlan.analyze)
+    val reversedCorrectAnswer = reversedPlan.analyze
+    comparePlans(reversedOptimized, reversedCorrectAnswer)
+
+    val negativeStepInputPlan = Range(10L, 1L, -1, 10)
+    val negativeStepOrderedPlan = negativeStepInputPlan.orderBy('id.desc)
+    val negativeStepOptimized = Optimize.execute(negativeStepOrderedPlan.analyze)
+    val negativeStepCorrectAnswer = negativeStepInputPlan.analyze
+    comparePlans(negativeStepOptimized, negativeStepCorrectAnswer)
+  }
+
+  test("sort should not be removed when there is a node which doesn't guarantee any order") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc)
+    val groupedAndResorted = orderedPlan.groupBy('a)(sum('a)).orderBy('a.asc)
+    val optimized = Optimize.execute(groupedAndResorted.analyze)
+    val correctAnswer = groupedAndResorted.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 0aee1d7be5788..917168162b236 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -3189,10 +3189,10 @@ class Dataset[T] private[sql](
 
   private[sql] def collectToPython(): Int = {
     EvaluatePython.registerPicklers()
-    withNewExecutionId {
+    withAction("collectToPython", queryExecution) { plan =>
       val toJava: (Any) => Any = EvaluatePython.toJava(_, schema)
-      val iter = new SerDeUtil.AutoBatchedPickler(
-        queryExecution.executedPlan.executeCollect().iterator.map(toJava))
+      val iter: Iterator[Array[Byte]] = new SerDeUtil.AutoBatchedPickler(
+        plan.executeCollect().iterator.map(toJava))
       PythonRDD.serveIterator(iter, "serve-DataFrame")
     }
   }
@@ -3201,8 +3201,9 @@ class Dataset[T] private[sql](
    * Collect a Dataset as ArrowPayload byte arrays and serve to PySpark.
    */
   private[sql] def collectAsArrowToPython(): Int = {
-    withNewExecutionId {
-      val iter = toArrowPayload.collect().iterator.map(_.asPythonSerializable)
+    withAction("collectAsArrowToPython", queryExecution) { plan =>
+      val iter: Iterator[Array[Byte]] =
+        toArrowPayload(plan).collect().iterator.map(_.asPythonSerializable)
       PythonRDD.serveIterator(iter, "serve-Arrow")
     }
   }
@@ -3311,14 +3312,19 @@ class Dataset[T] private[sql](
   }
 
   /** Convert to an RDD of ArrowPayload byte arrays */
-  private[sql] def toArrowPayload: RDD[ArrowPayload] = {
+  private[sql] def toArrowPayload(plan: SparkPlan): RDD[ArrowPayload] = {
     val schemaCaptured = this.schema
     val maxRecordsPerBatch = sparkSession.sessionState.conf.arrowMaxRecordsPerBatch
     val timeZoneId = sparkSession.sessionState.conf.sessionLocalTimeZone
-    queryExecution.toRdd.mapPartitionsInternal { iter =>
+    plan.execute().mapPartitionsInternal { iter =>
       val context = TaskContext.get()
       ArrowConverters.toPayloadIterator(
         iter, schemaCaptured, maxRecordsPerBatch, timeZoneId, context)
     }
   }
+
+  // This is only used in tests, for now.
+  private[sql] def toArrowPayload: RDD[ArrowPayload] = {
+    toArrowPayload(queryExecution.executedPlan)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index b107492fbb330..c502e583a55c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.ExecutionListenerManager
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{CallSite, Utils}
 
 
 /**
@@ -81,6 +81,9 @@ class SparkSession private(
     @transient private[sql] val extensions: SparkSessionExtensions)
   extends Serializable with Closeable with Logging { self =>
 
+  // The call site where this SparkSession was constructed.
+  private val creationSite: CallSite = Utils.getCallSite()
+
   private[sql] def this(sc: SparkContext) {
     this(sc, None, None, new SparkSessionExtensions)
   }
@@ -763,7 +766,7 @@ class SparkSession private(
 
 
 @InterfaceStability.Stable
-object SparkSession {
+object SparkSession extends Logging {
 
   /**
    * Builder for [[SparkSession]].
@@ -1090,4 +1093,20 @@ object SparkSession {
     }
   }
 
+  private[spark] def cleanupAnyExistingSession(): Unit = {
+    val session = getActiveSession.orElse(getDefaultSession)
+    if (session.isDefined) {
+      logWarning(
+        s"""An existing Spark session exists as the active or default session.
+           |This probably means another suite leaked it. Attempting to stop it before continuing.
+           |This existing Spark session was created at:
+           |
+           |${session.get.creationSite.longForm}
+           |
+         """.stripMargin)
+      session.get.stop()
+      SparkSession.clearActiveSession()
+      SparkSession.clearDefaultSession()
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index d68aeb275afda..a8794be7280c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -99,7 +99,7 @@ class CacheManager extends Logging {
         sparkSession.sessionState.conf.columnBatchSize, storageLevel,
         sparkSession.sessionState.executePlan(planToCache).executedPlan,
         tableName,
-        planToCache.stats)
+        planToCache)
       cachedData.add(CachedData(planToCache, inMemoryRelation))
     }
   }
@@ -148,7 +148,7 @@ class CacheManager extends Logging {
           storageLevel = cd.cachedRepresentation.storageLevel,
           child = spark.sessionState.executePlan(cd.plan).executedPlan,
           tableName = cd.cachedRepresentation.tableName,
-          statsOfPlanToCache = cd.plan.stats)
+          logicalPlan = cd.plan)
         needToRecache += cd.copy(cachedRepresentation = newCache)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
index 434214a10e1e3..fc3dbc1c5591b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.catalyst.expressions.{BoundReference, UnsafeRow}
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral, VariableValue}
+import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
@@ -52,7 +52,7 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
     val javaType = CodeGenerator.javaType(dataType)
     val value = CodeGenerator.getValueFromVector(columnVar, dataType, ordinal)
     val isNullVar = if (nullable) {
-      VariableValue(ctx.freshName("isNull"), CodeGenerator.JAVA_BOOLEAN)
+      JavaCode.isNullVariable(ctx.freshName("isNull"))
     } else {
       FalseLiteral
     }
@@ -66,7 +66,7 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
     } else {
       s"$javaType $valueVar = $value;"
     }).trim
-    ExprCode(code, isNullVar, VariableValue(valueVar, javaType))
+    ExprCode(code, isNullVar, JavaCode.variable(valueVar, dataType))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index f3555508185fe..be50a1571a2ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -125,7 +125,7 @@ case class LogicalRDD(
     output: Seq[Attribute],
     rdd: RDD[InternalRow],
     outputPartitioning: Partitioning = UnknownPartitioning(0),
-    outputOrdering: Seq[SortOrder] = Nil,
+    override val outputOrdering: Seq[SortOrder] = Nil,
     override val isStreaming: Boolean = false)(session: SparkSession)
   extends LeafNode with MultiInstanceRelation {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
index 0d9a62cace62a..e4812f3d338fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, VariableValue}
+import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 
@@ -157,8 +157,10 @@ case class ExpandExec(
           |${CodeGenerator.javaType(firstExpr.dataType)} $value =
           |  ${CodeGenerator.defaultValue(firstExpr.dataType)};
          """.stripMargin
-        ExprCode(code, VariableValue(isNull, CodeGenerator.JAVA_BOOLEAN),
-          VariableValue(value, CodeGenerator.javaType(firstExpr.dataType)))
+        ExprCode(
+          code,
+          JavaCode.isNullVariable(isNull),
+          JavaCode.variable(value, firstExpr.dataType))
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
index 85c5ebfdaa689..f40c50df74ccb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
+import org.apache.spark.sql.types._
 
 /**
  * For lazy computing, be sure the generator.terminate() called in the very last
@@ -170,10 +170,11 @@ case class GenerateExec(
     // Add position
     val position = if (e.position) {
       if (outer) {
-        Seq(ExprCode("", StatementValue(s"$index == -1", CodeGenerator.JAVA_BOOLEAN),
-          VariableValue(index, CodeGenerator.JAVA_INT)))
+        Seq(ExprCode(
+          JavaCode.isNullExpression(s"$index == -1"),
+          JavaCode.variable(index, IntegerType)))
       } else {
-        Seq(ExprCode("", FalseLiteral, VariableValue(index, CodeGenerator.JAVA_INT)))
+        Seq(ExprCode(FalseLiteral, JavaCode.variable(index, IntegerType)))
       }
     } else {
       Seq.empty
@@ -316,11 +317,9 @@ case class GenerateExec(
            |boolean $isNull = ${checks.mkString(" || ")};
            |$javaType $value = $isNull ? ${CodeGenerator.defaultValue(dt)} : $getter;
          """.stripMargin
-      ExprCode(code, VariableValue(isNull, CodeGenerator.JAVA_BOOLEAN),
-        VariableValue(value, javaType))
+      ExprCode(code, JavaCode.isNullVariable(isNull), JavaCode.variable(value, dt))
     } else {
-      ExprCode(s"$javaType $value = $getter;", FalseLiteral,
-        VariableValue(value, javaType))
+      ExprCode(s"$javaType $value = $getter;", FalseLiteral, JavaCode.variable(value, dt))
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 805ff3cf001ba..828b51fa199de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -111,7 +111,7 @@ trait CodegenSupport extends SparkPlan {
 
   private def prepareRowVar(ctx: CodegenContext, row: String, colVars: Seq[ExprCode]): ExprCode = {
     if (row != null) {
-      ExprCode("", FalseLiteral, VariableValue(row, "UnsafeRow"))
+      ExprCode.forNonNullValue(JavaCode.variable(row, classOf[UnsafeRow]))
     } else {
       if (colVars.nonEmpty) {
         val colExprs = output.zipWithIndex.map { case (attr, i) =>
@@ -128,8 +128,8 @@ trait CodegenSupport extends SparkPlan {
          """.stripMargin.trim
         ExprCode(code, FalseLiteral, ev.value)
       } else {
-        // There is no columns
-        ExprCode("", FalseLiteral, VariableValue("unsafeRow", "UnsafeRow"))
+        // There are no columns
+        ExprCode.forNonNullValue(JavaCode.variable("unsafeRow", classOf[UnsafeRow]))
       }
     }
   }
@@ -246,11 +246,10 @@ trait CodegenSupport extends SparkPlan {
         val isNull = ctx.freshName(s"exprIsNull_$i")
         arguments += ev.isNull
         parameters += s"boolean $isNull"
-        VariableValue(isNull, CodeGenerator.JAVA_BOOLEAN)
+        JavaCode.isNullVariable(isNull)
       }
 
-      paramVars += ExprCode("", paramIsNull,
-        VariableValue(paramName, CodeGenerator.javaType(attributes(i).dataType)))
+      paramVars += ExprCode(paramIsNull, JavaCode.variable(paramName, attributes(i).dataType))
     }
     (arguments, parameters, paramVars)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 8f7f10243d4cc..a5dc6ebf2b0f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -194,8 +194,10 @@ case class HashAggregateExec(
          | $isNull = ${ev.isNull};
          | $value = ${ev.value};
        """.stripMargin
-      ExprCode(ev.code + initVars, GlobalValue(isNull, CodeGenerator.JAVA_BOOLEAN),
-        GlobalValue(value, CodeGenerator.javaType(e.dataType)))
+      ExprCode(
+        ev.code + initVars,
+        JavaCode.isNullGlobal(isNull),
+        JavaCode.global(value, e.dataType))
     }
     val initBufVar = evaluateVariables(bufVars)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
index 4978954271311..de2d630de3fdb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.aggregate
 
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, DeclarativeAggregate}
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, GlobalValue}
+import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.types._
 
 /**
@@ -54,8 +54,10 @@ abstract class HashMapGenerator(
            | $isNull = ${ev.isNull};
            | $value = ${ev.value};
        """.stripMargin
-      ExprCode(ev.code + initVars, GlobalValue(isNull, CodeGenerator.JAVA_BOOLEAN),
-        GlobalValue(value, CodeGenerator.javaType(e.dataType)))
+      ExprCode(
+        ev.code + initVars,
+        JavaCode.isNullGlobal(isNull),
+        JavaCode.global(value, e.dataType))
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index cab7081400ce9..1edfdc888afd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -368,7 +368,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
     val number = ctx.addMutableState(CodeGenerator.JAVA_LONG, "number")
 
     val value = ctx.freshName("value")
-    val ev = ExprCode("", FalseLiteral, VariableValue(value, CodeGenerator.JAVA_LONG))
+    val ev = ExprCode.forNonNullValue(JavaCode.variable(value, LongType))
     val BigInt = classOf[java.math.BigInteger].getName
 
     // Inline mutable state since not many Range operations in a task
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 2579046e30708..a7ba9b86a176f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical
-import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, LogicalPlan, Statistics}
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.LongAccumulator
@@ -39,9 +39,9 @@ object InMemoryRelation {
       storageLevel: StorageLevel,
       child: SparkPlan,
       tableName: Option[String],
-      statsOfPlanToCache: Statistics): InMemoryRelation =
+      logicalPlan: LogicalPlan): InMemoryRelation =
     new InMemoryRelation(child.output, useCompression, batchSize, storageLevel, child, tableName)(
-      statsOfPlanToCache = statsOfPlanToCache)
+      statsOfPlanToCache = logicalPlan.stats, outputOrdering = logicalPlan.outputOrdering)
 }
 
 
@@ -64,7 +64,8 @@ case class InMemoryRelation(
     tableName: Option[String])(
     @transient var _cachedColumnBuffers: RDD[CachedBatch] = null,
     val sizeInBytesStats: LongAccumulator = child.sqlContext.sparkContext.longAccumulator,
-    statsOfPlanToCache: Statistics)
+    statsOfPlanToCache: Statistics,
+    override val outputOrdering: Seq[SortOrder])
   extends logical.LeafNode with MultiInstanceRelation {
 
   override protected def innerChildren: Seq[SparkPlan] = Seq(child)
@@ -76,7 +77,8 @@ case class InMemoryRelation(
       tableName = None)(
       _cachedColumnBuffers,
       sizeInBytesStats,
-      statsOfPlanToCache)
+      statsOfPlanToCache,
+      outputOrdering)
 
   override def producedAttributes: AttributeSet = outputSet
 
@@ -159,7 +161,7 @@ case class InMemoryRelation(
   def withOutput(newOutput: Seq[Attribute]): InMemoryRelation = {
     InMemoryRelation(
       newOutput, useCompression, batchSize, storageLevel, child, tableName)(
-        _cachedColumnBuffers, sizeInBytesStats, statsOfPlanToCache)
+        _cachedColumnBuffers, sizeInBytesStats, statsOfPlanToCache, outputOrdering)
   }
 
   override def newInstance(): this.type = {
@@ -172,7 +174,8 @@ case class InMemoryRelation(
       tableName)(
         _cachedColumnBuffers,
         sizeInBytesStats,
-        statsOfPlanToCache).asInstanceOf[this.type]
+        statsOfPlanToCache,
+        outputOrdering).asInstanceOf[this.type]
   }
 
   def cachedColumnBuffers: RDD[CachedBatch] = _cachedColumnBuffers
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index f7c3e9b019258..f6ef433f2ce15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -182,7 +182,7 @@ case class CreateDataSourceTableAsSelectCommand(
         // provider (for example, see org.apache.spark.sql.parquet.DefaultSource).
         schema = result.schema)
       // Table location is already validated. No need to check it again during table creation.
-      sessionState.catalog.createTable(newTable, ignoreIfExists = true)
+      sessionState.catalog.createTable(newTable, ignoreIfExists = false, validateLocation = false)
 
       result match {
         case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
index 4046396d0e614..a66a07673e25f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
@@ -85,7 +85,7 @@ class CatalogFileIndex(
         sparkSession, new Path(baseLocation.get), fileStatusCache, partitionSpec, Option(timeNs))
     } else {
       new InMemoryFileIndex(
-        sparkSession, rootPaths, table.storage.properties, partitionSchema = None)
+        sparkSession, rootPaths, table.storage.properties, userSpecifiedSchema = None)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index b84ea769808f9..f16d824201e77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -23,7 +23,6 @@ import scala.collection.JavaConverters._
 import scala.language.{existentials, implicitConversions}
 import scala.util.{Failure, Success, Try}
 
-import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -103,24 +102,6 @@ case class DataSource(
       bucket.sortColumnNames, "in the sort definition", equality)
   }
 
-  /**
-   * In the read path, only managed tables by Hive provide the partition columns properly when
-   * initializing this class. All other file based data sources will try to infer the partitioning,
-   * and then cast the inferred types to user specified dataTypes if the partition columns exist
-   * inside `userSpecifiedSchema`, otherwise we can hit data corruption bugs like SPARK-18510, or
-   * inconsistent data types as reported in SPARK-21463.
-   * @param fileIndex A FileIndex that will perform partition inference
-   * @return The PartitionSchema resolved from inference and cast according to `userSpecifiedSchema`
-   */
-  private def combineInferredAndUserSpecifiedPartitionSchema(fileIndex: FileIndex): StructType = {
-    val resolved = fileIndex.partitionSchema.map { partitionField =>
-      // SPARK-18510: try to get schema from userSpecifiedSchema, otherwise fallback to inferred
-      userSpecifiedSchema.flatMap(_.find(f => equality(f.name, partitionField.name))).getOrElse(
-        partitionField)
-    }
-    StructType(resolved)
-  }
-
   /**
    * Get the schema of the given FileFormat, if provided by `userSpecifiedSchema`, or try to infer
    * it. In the read path, only managed tables by Hive provide the partition columns properly when
@@ -140,31 +121,26 @@ case class DataSource(
    *     be any further inference in any triggers.
    *
    * @param format the file format object for this DataSource
-   * @param fileStatusCache the shared cache for file statuses to speed up listing
+   * @param fileIndex optional [[InMemoryFileIndex]] for getting partition schema and file list
    * @return A pair of the data schema (excluding partition columns) and the schema of the partition
    *         columns.
    */
   private def getOrInferFileFormatSchema(
       format: FileFormat,
-      fileStatusCache: FileStatusCache = NoopCache): (StructType, StructType) = {
-    // the operations below are expensive therefore try not to do them if we don't need to, e.g.,
+      fileIndex: Option[InMemoryFileIndex] = None): (StructType, StructType) = {
+    // The operations below are expensive therefore try not to do them if we don't need to, e.g.,
     // in streaming mode, we have already inferred and registered partition columns, we will
     // never have to materialize the lazy val below
-    lazy val tempFileIndex = {
-      val allPaths = caseInsensitiveOptions.get("path") ++ paths
-      val hadoopConf = sparkSession.sessionState.newHadoopConf()
-      val globbedPaths = allPaths.toSeq.flatMap { path =>
-        val hdfsPath = new Path(path)
-        val fs = hdfsPath.getFileSystem(hadoopConf)
-        val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
-        SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
-      }.toArray
-      new InMemoryFileIndex(sparkSession, globbedPaths, options, None, fileStatusCache)
+    lazy val tempFileIndex = fileIndex.getOrElse {
+      val globbedPaths =
+        checkAndGlobPathIfNecessary(checkEmptyGlobPath = false, checkFilesExist = false)
+      createInMemoryFileIndex(globbedPaths)
     }
+
     val partitionSchema = if (partitionColumns.isEmpty) {
       // Try to infer partitioning, because no DataSource in the read path provides the partitioning
       // columns properly unless it is a Hive DataSource
-      combineInferredAndUserSpecifiedPartitionSchema(tempFileIndex)
+      tempFileIndex.partitionSchema
     } else {
       // maintain old behavior before SPARK-18510. If userSpecifiedSchema is empty used inferred
       // partitioning
@@ -356,13 +332,7 @@ case class DataSource(
             caseInsensitiveOptions.get("path").toSeq ++ paths,
             sparkSession.sessionState.newHadoopConf()) =>
         val basePath = new Path((caseInsensitiveOptions.get("path").toSeq ++ paths).head)
-        val tempFileCatalog = new MetadataLogFileIndex(sparkSession, basePath, None)
-        val fileCatalog = if (userSpecifiedSchema.nonEmpty) {
-          val partitionSchema = combineInferredAndUserSpecifiedPartitionSchema(tempFileCatalog)
-          new MetadataLogFileIndex(sparkSession, basePath, Option(partitionSchema))
-        } else {
-          tempFileCatalog
-        }
+        val fileCatalog = new MetadataLogFileIndex(sparkSession, basePath, userSpecifiedSchema)
         val dataSchema = userSpecifiedSchema.orElse {
           format.inferSchema(
             sparkSession,
@@ -384,24 +354,23 @@ case class DataSource(
 
       // This is a non-streaming file based datasource.
       case (format: FileFormat, _) =>
-        val allPaths = caseInsensitiveOptions.get("path") ++ paths
-        val hadoopConf = sparkSession.sessionState.newHadoopConf()
-        val globbedPaths = allPaths.flatMap(
-          DataSource.checkAndGlobPathIfNecessary(hadoopConf, _, checkFilesExist)).toArray
-
-        val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
-        val (dataSchema, partitionSchema) = getOrInferFileFormatSchema(format, fileStatusCache)
-
-        val fileCatalog = if (sparkSession.sqlContext.conf.manageFilesourcePartitions &&
-            catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog) {
+        val globbedPaths =
+          checkAndGlobPathIfNecessary(checkEmptyGlobPath = true, checkFilesExist = checkFilesExist)
+        val useCatalogFileIndex = sparkSession.sqlContext.conf.manageFilesourcePartitions &&
+          catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog &&
+          catalogTable.get.partitionColumnNames.nonEmpty
+        val (fileCatalog, dataSchema, partitionSchema) = if (useCatalogFileIndex) {
           val defaultTableSize = sparkSession.sessionState.conf.defaultSizeInBytes
-          new CatalogFileIndex(
+          val index = new CatalogFileIndex(
             sparkSession,
             catalogTable.get,
             catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(defaultTableSize))
+          (index, catalogTable.get.dataSchema, catalogTable.get.partitionSchema)
         } else {
-          new InMemoryFileIndex(
-            sparkSession, globbedPaths, options, Some(partitionSchema), fileStatusCache)
+          val index = createInMemoryFileIndex(globbedPaths)
+          val (resultDataSchema, resultPartitionSchema) =
+            getOrInferFileFormatSchema(format, Some(index))
+          (index, resultDataSchema, resultPartitionSchema)
         }
 
         HadoopFsRelation(
@@ -552,6 +521,40 @@ case class DataSource(
         sys.error(s"${providingClass.getCanonicalName} does not allow create table as select.")
     }
   }
+
+  /** Returns an [[InMemoryFileIndex]] that can be used to get partition schema and file list. */
+  private def createInMemoryFileIndex(globbedPaths: Seq[Path]): InMemoryFileIndex = {
+    val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
+    new InMemoryFileIndex(
+      sparkSession, globbedPaths, options, userSpecifiedSchema, fileStatusCache)
+  }
+
+  /**
+   * Checks and returns files in all the paths.
+   */
+  private def checkAndGlobPathIfNecessary(
+      checkEmptyGlobPath: Boolean,
+      checkFilesExist: Boolean): Seq[Path] = {
+    val allPaths = caseInsensitiveOptions.get("path") ++ paths
+    val hadoopConf = sparkSession.sessionState.newHadoopConf()
+    allPaths.flatMap { path =>
+      val hdfsPath = new Path(path)
+      val fs = hdfsPath.getFileSystem(hadoopConf)
+      val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+      val globPath = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
+
+      if (checkEmptyGlobPath && globPath.isEmpty) {
+        throw new AnalysisException(s"Path does not exist: $qualified")
+      }
+
+      // Sufficient to check head of the globPath seq for non-glob scenario
+      // Don't need to check once again if files exist in streaming mode
+      if (checkFilesExist && !fs.exists(globPath.head)) {
+        throw new AnalysisException(s"Path does not exist: ${globPath.head}")
+      }
+      globPath
+    }.toSeq
+  }
 }
 
 object DataSource extends Logging {
@@ -699,30 +702,6 @@ object DataSource extends Logging {
       locationUri = path.map(CatalogUtils.stringToURI), properties = optionsWithoutPath)
   }
 
-  /**
-   * If `path` is a file pattern, return all the files that match it. Otherwise, return itself.
-   * If `checkFilesExist` is `true`, also check the file existence.
-   */
-  private def checkAndGlobPathIfNecessary(
-      hadoopConf: Configuration,
-      path: String,
-      checkFilesExist: Boolean): Seq[Path] = {
-    val hdfsPath = new Path(path)
-    val fs = hdfsPath.getFileSystem(hadoopConf)
-    val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
-    val globPath = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
-
-    if (globPath.isEmpty) {
-      throw new AnalysisException(s"Path does not exist: $qualified")
-    }
-    // Sufficient to check head of the globPath seq for non-glob scenario
-    // Don't need to check once again if files exist in streaming mode
-    if (checkFilesExist && !fs.exists(globPath.head)) {
-      throw new AnalysisException(s"Path does not exist: ${globPath.head}")
-    }
-    globPath
-  }
-
   /**
    * Called before writing into a FileFormat based data source to make sure the
    * supplied schema is not empty.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index 318ada0ceefc5..739d1f456e3ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -41,17 +41,17 @@ import org.apache.spark.util.SerializableConfiguration
  * @param rootPathsSpecified the list of root table paths to scan (some of which might be
  *                           filtered out later)
  * @param parameters as set of options to control discovery
- * @param partitionSchema an optional partition schema that will be use to provide types for the
- *                        discovered partitions
+ * @param userSpecifiedSchema an optional user specified schema that will be use to provide
+ *                            types for the discovered partitions
  */
 class InMemoryFileIndex(
     sparkSession: SparkSession,
     rootPathsSpecified: Seq[Path],
     parameters: Map[String, String],
-    partitionSchema: Option[StructType],
+    userSpecifiedSchema: Option[StructType],
     fileStatusCache: FileStatusCache = NoopCache)
   extends PartitioningAwareFileIndex(
-    sparkSession, parameters, partitionSchema, fileStatusCache) {
+    sparkSession, parameters, userSpecifiedSchema, fileStatusCache) {
 
   // Filter out streaming metadata dirs or files such as "/.../_spark_metadata" (the metadata dir)
   // or "/.../_spark_metadata/0" (a file in the metadata dir). `rootPathsSpecified` might contain
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 6b6f6388d54e8..cc8af7b92c454 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -34,13 +34,13 @@ import org.apache.spark.sql.types.{StringType, StructType}
  * It provides the necessary methods to parse partition data based on a set of files.
  *
  * @param parameters as set of options to control partition discovery
- * @param userPartitionSchema an optional partition schema that will be use to provide types for
- *                            the discovered partitions
+ * @param userSpecifiedSchema an optional user specified schema that will be use to provide
+ *                            types for the discovered partitions
  */
 abstract class PartitioningAwareFileIndex(
     sparkSession: SparkSession,
     parameters: Map[String, String],
-    userPartitionSchema: Option[StructType],
+    userSpecifiedSchema: Option[StructType],
     fileStatusCache: FileStatusCache = NoopCache) extends FileIndex with Logging {
   import PartitioningAwareFileIndex.BASE_PATH_PARAM
 
@@ -126,35 +126,32 @@ abstract class PartitioningAwareFileIndex(
     val caseInsensitiveOptions = CaseInsensitiveMap(parameters)
     val timeZoneId = caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
       .getOrElse(sparkSession.sessionState.conf.sessionLocalTimeZone)
-
-    userPartitionSchema match {
+    val inferredPartitionSpec = PartitioningUtils.parsePartitions(
+      leafDirs,
+      typeInference = sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
+      basePaths = basePaths,
+      timeZoneId = timeZoneId)
+    userSpecifiedSchema match {
       case Some(userProvidedSchema) if userProvidedSchema.nonEmpty =>
-        val spec = PartitioningUtils.parsePartitions(
-          leafDirs,
-          typeInference = false,
-          basePaths = basePaths,
-          timeZoneId = timeZoneId)
+        val userPartitionSchema =
+          combineInferredAndUserSpecifiedPartitionSchema(inferredPartitionSpec)
 
-        // Without auto inference, all of value in the `row` should be null or in StringType,
         // we need to cast into the data type that user specified.
         def castPartitionValuesToUserSchema(row: InternalRow) = {
           InternalRow((0 until row.numFields).map { i =>
+            val dt = inferredPartitionSpec.partitionColumns.fields(i).dataType
             Cast(
-              Literal.create(row.getUTF8String(i), StringType),
-              userProvidedSchema.fields(i).dataType,
+              Literal.create(row.get(i, dt), dt),
+              userPartitionSchema.fields(i).dataType,
               Option(timeZoneId)).eval()
           }: _*)
         }
 
-        PartitionSpec(userProvidedSchema, spec.partitions.map { part =>
+        PartitionSpec(userPartitionSchema, inferredPartitionSpec.partitions.map { part =>
           part.copy(values = castPartitionValuesToUserSchema(part.values))
         })
       case _ =>
-        PartitioningUtils.parsePartitions(
-          leafDirs,
-          typeInference = sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
-          basePaths = basePaths,
-          timeZoneId = timeZoneId)
+        inferredPartitionSpec
     }
   }
 
@@ -236,6 +233,25 @@ abstract class PartitioningAwareFileIndex(
     val name = path.getName
     !((name.startsWith("_") && !name.contains("=")) || name.startsWith("."))
   }
+
+  /**
+   * In the read path, only managed tables by Hive provide the partition columns properly when
+   * initializing this class. All other file based data sources will try to infer the partitioning,
+   * and then cast the inferred types to user specified dataTypes if the partition columns exist
+   * inside `userSpecifiedSchema`, otherwise we can hit data corruption bugs like SPARK-18510, or
+   * inconsistent data types as reported in SPARK-21463.
+   * @param spec A partition inference result
+   * @return The PartitionSchema resolved from inference and cast according to `userSpecifiedSchema`
+   */
+  private def combineInferredAndUserSpecifiedPartitionSchema(spec: PartitionSpec): StructType = {
+    val equality = sparkSession.sessionState.conf.resolver
+    val resolved = spec.partitionColumns.map { partitionField =>
+      // SPARK-18510: try to get schema from userSpecifiedSchema, otherwise fallback to inferred
+      userSpecifiedSchema.flatMap(_.find(f => equality(f.name, partitionField.name))).getOrElse(
+        partitionField)
+    }
+    StructType(resolved)
+  }
 }
 
 object PartitioningAwareFileIndex {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
index fa62a32d51f3e..6fa716d9fadee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastDistribution, Distribution, UnspecifiedDistribution}
 import org.apache.spark.sql.execution.{BinaryExecNode, CodegenSupport, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.types.LongType
+import org.apache.spark.sql.types.{BooleanType, LongType}
 import org.apache.spark.util.TaskCompletionListener
 
 /**
@@ -192,8 +192,7 @@ case class BroadcastHashJoinExec(
           |  $value = ${ev.value};
           |}
          """.stripMargin
-        ExprCode(code, VariableValue(isNull, CodeGenerator.JAVA_BOOLEAN),
-          VariableValue(value, CodeGenerator.javaType(a.dataType)))
+        ExprCode(code, JavaCode.isNullVariable(isNull), JavaCode.variable(value, a.dataType))
       }
     }
   }
@@ -488,8 +487,8 @@ case class BroadcastHashJoinExec(
       s"$existsVar = true;"
     }
 
-    val resultVar = input ++ Seq(ExprCode("", FalseLiteral,
-      VariableValue(existsVar, CodeGenerator.JAVA_BOOLEAN)))
+    val resultVar = input ++ Seq(ExprCode.forNonNullValue(
+      JavaCode.variable(existsVar, BooleanType)))
     if (broadcastRelation.value.keyIsUnique) {
       s"""
          |// generate join key for stream side
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index b61acb8d4fda9..d8261f0f33b61 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -22,11 +22,10 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral, VariableValue}
+import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.execution.{BinaryExecNode, CodegenSupport,
-ExternalAppendOnlyUnsafeRowArray, RowIterator, SparkPlan}
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.util.collection.BitSet
 
@@ -531,13 +530,12 @@ case class SortMergeJoinExec(
              |boolean $isNull = false;
              |$javaType $value = $defaultValue;
            """.stripMargin
-        (ExprCode(code, VariableValue(isNull, CodeGenerator.JAVA_BOOLEAN),
-          VariableValue(value, CodeGenerator.javaType(a.dataType))), leftVarsDecl)
+        (ExprCode(code, JavaCode.isNullVariable(isNull), JavaCode.variable(value, a.dataType)),
+          leftVarsDecl)
       } else {
         val code = s"$value = $valueCode;"
         val leftVarsDecl = s"""$javaType $value = $defaultValue;"""
-        (ExprCode(code, FalseLiteral,
-          VariableValue(value, CodeGenerator.javaType(a.dataType))), leftVarsDecl)
+        (ExprCode(code, FalseLiteral, JavaCode.variable(value, a.dataType)), leftVarsDecl)
       }
     }.unzip
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
new file mode 100644
index 0000000000000..606ba250ad9d2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
@@ -0,0 +1,349 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.streaming
+
+import java.io.{FileNotFoundException, IOException, OutputStream}
+import java.util.{EnumSet, UUID}
+
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs._
+import org.apache.hadoop.fs.local.{LocalFs, RawLocalFs}
+import org.apache.hadoop.fs.permission.FsPermission
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager.RenameHelperMethods
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.Utils
+
+/**
+ * An interface to abstract out all operation related to streaming checkpoints. Most importantly,
+ * the key operation this interface provides is `createAtomic(path, overwrite)` which returns a
+ * `CancellableFSDataOutputStream`. This method is used by [[HDFSMetadataLog]] and
+ * [[org.apache.spark.sql.execution.streaming.state.StateStore StateStore]] implementations
+ * to write a complete checkpoint file atomically (i.e. no partial file will be visible), with or
+ * without overwrite.
+ *
+ * This higher-level interface above the Hadoop FileSystem is necessary because
+ * different implementation of FileSystem/FileContext may have different combination of operations
+ * to provide the desired atomic guarantees (e.g. write-to-temp-file-and-rename,
+ * direct-write-and-cancel-on-failure) and this abstraction allow different implementations while
+ * keeping the usage simple (`createAtomic` -> `close` or `cancel`).
+ */
+trait CheckpointFileManager {
+
+  import org.apache.spark.sql.execution.streaming.CheckpointFileManager._
+
+  /**
+   * Create a file and make its contents available atomically after the output stream is closed.
+   *
+   * @param path                Path to create
+   * @param overwriteIfPossible If true, then the implementations must do a best-effort attempt to
+   *                            overwrite the file if it already exists. It should not throw
+   *                            any exception if the file exists. However, if false, then the
+   *                            implementation must not overwrite if the file alraedy exists and
+   *                            must throw `FileAlreadyExistsException` in that case.
+   */
+  def createAtomic(path: Path, overwriteIfPossible: Boolean): CancellableFSDataOutputStream
+
+  /** Open a file for reading, or throw exception if it does not exist. */
+  def open(path: Path): FSDataInputStream
+
+  /** List the files in a path that match a filter. */
+  def list(path: Path, filter: PathFilter): Array[FileStatus]
+
+  /** List all the files in a path. */
+  def list(path: Path): Array[FileStatus] = {
+    list(path, new PathFilter { override def accept(path: Path): Boolean = true })
+  }
+
+  /** Make directory at the give path and all its parent directories as needed. */
+  def mkdirs(path: Path): Unit
+
+  /** Whether path exists */
+  def exists(path: Path): Boolean
+
+  /** Recursively delete a path if it exists. Should not throw exception if file doesn't exist. */
+  def delete(path: Path): Unit
+
+  /** Is the default file system this implementation is operating on the local file system. */
+  def isLocal: Boolean
+}
+
+object CheckpointFileManager extends Logging {
+
+  /**
+   * Additional methods in CheckpointFileManager implementations that allows
+   * [[RenameBasedFSDataOutputStream]] get atomicity by write-to-temp-file-and-rename
+   */
+  sealed trait RenameHelperMethods { self => CheckpointFileManager
+    /** Create a file with overwrite. */
+    def createTempFile(path: Path): FSDataOutputStream
+
+    /**
+     * Rename a file.
+     *
+     * @param srcPath             Source path to rename
+     * @param dstPath             Destination path to rename to
+     * @param overwriteIfPossible If true, then the implementations must do a best-effort attempt to
+     *                            overwrite the file if it already exists. It should not throw
+     *                            any exception if the file exists. However, if false, then the
+     *                            implementation must not overwrite if the file alraedy exists and
+     *                            must throw `FileAlreadyExistsException` in that case.
+     */
+    def renameTempFile(srcPath: Path, dstPath: Path, overwriteIfPossible: Boolean): Unit
+  }
+
+  /**
+   * An interface to add the cancel() operation to [[FSDataOutputStream]]. This is used
+   * mainly by `CheckpointFileManager.createAtomic` to write a file atomically.
+   *
+   * @see [[CheckpointFileManager]].
+   */
+  abstract class CancellableFSDataOutputStream(protected val underlyingStream: OutputStream)
+    extends FSDataOutputStream(underlyingStream, null) {
+    /** Cancel the `underlyingStream` and ensure that the output file is not generated. */
+    def cancel(): Unit
+  }
+
+  /**
+   * An implementation of [[CancellableFSDataOutputStream]] that writes a file atomically by writing
+   * to a temporary file and then renames.
+   */
+  sealed class RenameBasedFSDataOutputStream(
+      fm: CheckpointFileManager with RenameHelperMethods,
+      finalPath: Path,
+      tempPath: Path,
+      overwriteIfPossible: Boolean)
+    extends CancellableFSDataOutputStream(fm.createTempFile(tempPath)) {
+
+    def this(fm: CheckpointFileManager with RenameHelperMethods, path: Path, overwrite: Boolean) = {
+      this(fm, path, generateTempPath(path), overwrite)
+    }
+
+    logInfo(s"Writing atomically to $finalPath using temp file $tempPath")
+    @volatile private var terminated = false
+
+    override def close(): Unit = synchronized {
+      try {
+        if (terminated) return
+        underlyingStream.close()
+        try {
+          fm.renameTempFile(tempPath, finalPath, overwriteIfPossible)
+        } catch {
+          case fe: FileAlreadyExistsException =>
+            logWarning(
+              s"Failed to rename temp file $tempPath to $finalPath because file exists", fe)
+            if (!overwriteIfPossible) throw fe
+        }
+        logInfo(s"Renamed temp file $tempPath to $finalPath")
+      } finally {
+        terminated = true
+      }
+    }
+
+    override def cancel(): Unit = synchronized {
+      try {
+        if (terminated) return
+        underlyingStream.close()
+        fm.delete(tempPath)
+      } catch {
+        case NonFatal(e) =>
+          logWarning(s"Error cancelling write to $finalPath", e)
+      } finally {
+        terminated = true
+      }
+    }
+  }
+
+
+  /** Create an instance of [[CheckpointFileManager]] based on the path and configuration. */
+  def create(path: Path, hadoopConf: Configuration): CheckpointFileManager = {
+    val fileManagerClass = hadoopConf.get(
+      SQLConf.STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key)
+    if (fileManagerClass != null) {
+      return Utils.classForName(fileManagerClass)
+        .getConstructor(classOf[Path], classOf[Configuration])
+        .newInstance(path, hadoopConf)
+        .asInstanceOf[CheckpointFileManager]
+    }
+    try {
+      // Try to create a manager based on `FileContext` because HDFS's `FileContext.rename()
+      // gives atomic renames, which is what we rely on for the default implementation
+      // `CheckpointFileManager.createAtomic`.
+      new FileContextBasedCheckpointFileManager(path, hadoopConf)
+    } catch {
+      case e: UnsupportedFileSystemException =>
+        logWarning(
+          "Could not use FileContext API for managing Structured Streaming checkpoint files at " +
+            s"$path. Using FileSystem API instead for managing log files. If the implementation " +
+            s"of FileSystem.rename() is not atomic, then the correctness and fault-tolerance of" +
+            s"your Structured Streaming is not guaranteed.")
+        new FileSystemBasedCheckpointFileManager(path, hadoopConf)
+    }
+  }
+
+  private def generateTempPath(path: Path): Path = {
+    val tc = org.apache.spark.TaskContext.get
+    val tid = if (tc != null) ".TID" + tc.taskAttemptId else ""
+    new Path(path.getParent, s".${path.getName}.${UUID.randomUUID}${tid}.tmp")
+  }
+}
+
+
+/** An implementation of [[CheckpointFileManager]] using Hadoop's [[FileSystem]] API. */
+class FileSystemBasedCheckpointFileManager(path: Path, hadoopConf: Configuration)
+  extends CheckpointFileManager with RenameHelperMethods with Logging {
+
+  import CheckpointFileManager._
+
+  protected val fs = path.getFileSystem(hadoopConf)
+
+  override def list(path: Path, filter: PathFilter): Array[FileStatus] = {
+    fs.listStatus(path, filter)
+  }
+
+  override def mkdirs(path: Path): Unit = {
+    fs.mkdirs(path, FsPermission.getDirDefault)
+  }
+
+  override def createTempFile(path: Path): FSDataOutputStream = {
+    fs.create(path, true)
+  }
+
+  override def createAtomic(
+      path: Path,
+      overwriteIfPossible: Boolean): CancellableFSDataOutputStream = {
+    new RenameBasedFSDataOutputStream(this, path, overwriteIfPossible)
+  }
+
+  override def open(path: Path): FSDataInputStream = {
+    fs.open(path)
+  }
+
+  override def exists(path: Path): Boolean = {
+    try
+      return fs.getFileStatus(path) != null
+    catch {
+      case e: FileNotFoundException =>
+        return false
+    }
+  }
+
+  override def renameTempFile(srcPath: Path, dstPath: Path, overwriteIfPossible: Boolean): Unit = {
+    if (!overwriteIfPossible && fs.exists(dstPath)) {
+      throw new FileAlreadyExistsException(
+        s"Failed to rename $srcPath to $dstPath as destination already exists")
+    }
+
+    if (!fs.rename(srcPath, dstPath)) {
+      // FileSystem.rename() returning false is very ambiguous as it can be for many reasons.
+      // This tries to make a best effort attempt to return the most appropriate exception.
+      if (fs.exists(dstPath)) {
+        if (!overwriteIfPossible) {
+          throw new FileAlreadyExistsException(s"Failed to rename as $dstPath already exists")
+        }
+      } else if (!fs.exists(srcPath)) {
+        throw new FileNotFoundException(s"Failed to rename as $srcPath was not found")
+      } else {
+        val msg = s"Failed to rename temp file $srcPath to $dstPath as rename returned false"
+        logWarning(msg)
+        throw new IOException(msg)
+      }
+    }
+  }
+
+  override def delete(path: Path): Unit = {
+    try {
+      fs.delete(path, true)
+    } catch {
+      case e: FileNotFoundException =>
+        logInfo(s"Failed to delete $path as it does not exist")
+        // ignore if file has already been deleted
+    }
+  }
+
+  override def isLocal: Boolean = fs match {
+    case _: LocalFileSystem | _: RawLocalFileSystem => true
+    case _ => false
+  }
+}
+
+
+/** An implementation of [[CheckpointFileManager]] using Hadoop's [[FileContext]] API. */
+class FileContextBasedCheckpointFileManager(path: Path, hadoopConf: Configuration)
+  extends CheckpointFileManager with RenameHelperMethods with Logging {
+
+  import CheckpointFileManager._
+
+  private val fc = if (path.toUri.getScheme == null) {
+    FileContext.getFileContext(hadoopConf)
+  } else {
+    FileContext.getFileContext(path.toUri, hadoopConf)
+  }
+
+  override def list(path: Path, filter: PathFilter): Array[FileStatus] = {
+    fc.util.listStatus(path, filter)
+  }
+
+  override def mkdirs(path: Path): Unit = {
+    fc.mkdir(path, FsPermission.getDirDefault, true)
+  }
+
+  override def createTempFile(path: Path): FSDataOutputStream = {
+    import CreateFlag._
+    import Options._
+    fc.create(
+      path, EnumSet.of(CREATE, OVERWRITE), CreateOpts.checksumParam(ChecksumOpt.createDisabled()))
+  }
+
+  override def createAtomic(
+      path: Path,
+      overwriteIfPossible: Boolean): CancellableFSDataOutputStream = {
+    new RenameBasedFSDataOutputStream(this, path, overwriteIfPossible)
+  }
+
+  override def open(path: Path): FSDataInputStream = {
+    fc.open(path)
+  }
+
+  override def exists(path: Path): Boolean = {
+    fc.util.exists(path)
+  }
+
+  override def renameTempFile(srcPath: Path, dstPath: Path, overwriteIfPossible: Boolean): Unit = {
+    import Options.Rename._
+    fc.rename(srcPath, dstPath, if (overwriteIfPossible) OVERWRITE else NONE)
+  }
+
+
+  override def delete(path: Path): Unit = {
+    try {
+      fc.delete(path, true)
+    } catch {
+      case e: FileNotFoundException =>
+      // ignore if file has already been deleted
+    }
+  }
+
+  override def isLocal: Boolean = fc.getDefaultFileSystem match {
+    case _: LocalFs | _: RawLocalFs => true // LocalFs = RawLocalFs + ChecksumFs
+    case _ => false
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 00bc215a5dc8c..bd0a46115ceb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -57,10 +57,10 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
   require(implicitly[ClassTag[T]].runtimeClass != classOf[Seq[_]],
     "Should not create a log with type Seq, use Arrays instead - see SPARK-17372")
 
-  import HDFSMetadataLog._
-
   val metadataPath = new Path(path)
-  protected val fileManager = createFileManager()
+
+  protected val fileManager =
+    CheckpointFileManager.create(metadataPath, sparkSession.sessionState.newHadoopConf)
 
   if (!fileManager.exists(metadataPath)) {
     fileManager.mkdirs(metadataPath)
@@ -109,84 +109,31 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     require(metadata != null, "'null' metadata cannot written to a metadata log")
     get(batchId).map(_ => false).getOrElse {
       // Only write metadata when the batch has not yet been written
-      writeBatch(batchId, metadata)
+      writeBatchToFile(metadata, batchIdToPath(batchId))
       true
     }
   }
 
-  private def writeTempBatch(metadata: T): Option[Path] = {
-    while (true) {
-      val tempPath = new Path(metadataPath, s".${UUID.randomUUID.toString}.tmp")
-      try {
-        val output = fileManager.create(tempPath)
-        try {
-          serialize(metadata, output)
-          return Some(tempPath)
-        } finally {
-          output.close()
-        }
-      } catch {
-        case e: FileAlreadyExistsException =>
-          // Failed to create "tempPath". There are two cases:
-          // 1. Someone is creating "tempPath" too.
-          // 2. This is a restart. "tempPath" has already been created but not moved to the final
-          // batch file (not committed).
-          //
-          // For both cases, the batch has not yet been committed. So we can retry it.
-          //
-          // Note: there is a potential risk here: if HDFSMetadataLog A is running, people can use
-          // the same metadata path to create "HDFSMetadataLog" and fail A. However, this is not a
-          // big problem because it requires the attacker must have the permission to write the
-          // metadata path. In addition, the old Streaming also have this issue, people can create
-          // malicious checkpoint files to crash a Streaming application too.
-      }
-    }
-    None
-  }
-
-  /**
-   * Write a batch to a temp file then rename it to the batch file.
+  /** Write a batch to a temp file then rename it to the batch file.
    *
    * There may be multiple [[HDFSMetadataLog]] using the same metadata path. Although it is not a
    * valid behavior, we still need to prevent it from destroying the files.
    */
-  private def writeBatch(batchId: Long, metadata: T): Unit = {
-    val tempPath = writeTempBatch(metadata).getOrElse(
-      throw new IllegalStateException(s"Unable to create temp batch file $batchId"))
+  private def writeBatchToFile(metadata: T, path: Path): Unit = {
+    val output = fileManager.createAtomic(path, overwriteIfPossible = false)
     try {
-      // Try to commit the batch
-      // It will fail if there is an existing file (someone has committed the batch)
-      logDebug(s"Attempting to write log #${batchIdToPath(batchId)}")
-      fileManager.rename(tempPath, batchIdToPath(batchId))
-
-      // SPARK-17475: HDFSMetadataLog should not leak CRC files
-      // If the underlying filesystem didn't rename the CRC file, delete it.
-      val crcPath = new Path(tempPath.getParent(), s".${tempPath.getName()}.crc")
-      if (fileManager.exists(crcPath)) fileManager.delete(crcPath)
+      serialize(metadata, output)
+      output.close()
     } catch {
       case e: FileAlreadyExistsException =>
-        // If "rename" fails, it means some other "HDFSMetadataLog" has committed the batch.
-        // So throw an exception to tell the user this is not a valid behavior.
+        output.cancel()
+        // If next batch file already exists, then another concurrently running query has
+        // written it.
         throw new ConcurrentModificationException(
-          s"Multiple HDFSMetadataLog are using $path", e)
-    } finally {
-      fileManager.delete(tempPath)
-    }
-  }
-
-  /**
-   * @return the deserialized metadata in a batch file, or None if file not exist.
-   * @throws IllegalArgumentException when path does not point to a batch file.
-   */
-  def get(batchFile: Path): Option[T] = {
-    if (fileManager.exists(batchFile)) {
-      if (isBatchFile(batchFile)) {
-        get(pathToBatchId(batchFile))
-      } else {
-        throw new IllegalArgumentException(s"File ${batchFile} is not a batch file!")
-      }
-    } else {
-      None
+          s"Multiple streaming queries are concurrently using $path", e)
+      case e: Throwable =>
+        output.cancel()
+        throw e
     }
   }
 
@@ -219,7 +166,7 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
         (endId.isEmpty || batchId <= endId.get) && (startId.isEmpty || batchId >= startId.get)
     }.sorted
 
-    verifyBatchIds(batchIds, startId, endId)
+    HDFSMetadataLog.verifyBatchIds(batchIds, startId, endId)
 
     batchIds.map(batchId => (batchId, get(batchId))).filter(_._2.isDefined).map {
       case (batchId, metadataOption) =>
@@ -280,19 +227,6 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     }
   }
 
-  private def createFileManager(): FileManager = {
-    val hadoopConf = sparkSession.sessionState.newHadoopConf()
-    try {
-      new FileContextManager(metadataPath, hadoopConf)
-    } catch {
-      case e: UnsupportedFileSystemException =>
-        logWarning("Could not use FileContext API for managing metadata log files at path " +
-          s"$metadataPath. Using FileSystem API instead for managing log files. The log may be " +
-          s"inconsistent under failures.")
-        new FileSystemManager(metadataPath, hadoopConf)
-    }
-  }
-
   /**
    * Parse the log version from the given `text` -- will throw exception when the parsed version
    * exceeds `maxSupportedVersion`, or when `text` is malformed (such as "xyz", "v", "v-1",
@@ -327,135 +261,6 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
 
 object HDFSMetadataLog {
 
-  /** A simple trait to abstract out the file management operations needed by HDFSMetadataLog. */
-  trait FileManager {
-
-    /** List the files in a path that match a filter. */
-    def list(path: Path, filter: PathFilter): Array[FileStatus]
-
-    /** Make directory at the give path and all its parent directories as needed. */
-    def mkdirs(path: Path): Unit
-
-    /** Whether path exists */
-    def exists(path: Path): Boolean
-
-    /** Open a file for reading, or throw exception if it does not exist. */
-    def open(path: Path): FSDataInputStream
-
-    /** Create path, or throw exception if it already exists */
-    def create(path: Path): FSDataOutputStream
-
-    /**
-     * Atomically rename path, or throw exception if it cannot be done.
-     * Should throw FileNotFoundException if srcPath does not exist.
-     * Should throw FileAlreadyExistsException if destPath already exists.
-     */
-    def rename(srcPath: Path, destPath: Path): Unit
-
-    /** Recursively delete a path if it exists. Should not throw exception if file doesn't exist. */
-    def delete(path: Path): Unit
-  }
-
-  /**
-   * Default implementation of FileManager using newer FileContext API.
-   */
-  class FileContextManager(path: Path, hadoopConf: Configuration) extends FileManager {
-    private val fc = if (path.toUri.getScheme == null) {
-      FileContext.getFileContext(hadoopConf)
-    } else {
-      FileContext.getFileContext(path.toUri, hadoopConf)
-    }
-
-    override def list(path: Path, filter: PathFilter): Array[FileStatus] = {
-      fc.util.listStatus(path, filter)
-    }
-
-    override def rename(srcPath: Path, destPath: Path): Unit = {
-      fc.rename(srcPath, destPath)
-    }
-
-    override def mkdirs(path: Path): Unit = {
-      fc.mkdir(path, FsPermission.getDirDefault, true)
-    }
-
-    override def open(path: Path): FSDataInputStream = {
-      fc.open(path)
-    }
-
-    override def create(path: Path): FSDataOutputStream = {
-      fc.create(path, EnumSet.of(CreateFlag.CREATE))
-    }
-
-    override def exists(path: Path): Boolean = {
-      fc.util().exists(path)
-    }
-
-    override def delete(path: Path): Unit = {
-      try {
-        fc.delete(path, true)
-      } catch {
-        case e: FileNotFoundException =>
-        // ignore if file has already been deleted
-      }
-    }
-  }
-
-  /**
-   * Implementation of FileManager using older FileSystem API. Note that this implementation
-   * cannot provide atomic renaming of paths, hence can lead to consistency issues. This
-   * should be used only as a backup option, when FileContextManager cannot be used.
-   */
-  class FileSystemManager(path: Path, hadoopConf: Configuration) extends FileManager {
-    private val fs = path.getFileSystem(hadoopConf)
-
-    override def list(path: Path, filter: PathFilter): Array[FileStatus] = {
-      fs.listStatus(path, filter)
-    }
-
-    /**
-     * Rename a path. Note that this implementation is not atomic.
-     * @throws FileNotFoundException if source path does not exist.
-     * @throws FileAlreadyExistsException if destination path already exists.
-     * @throws IOException if renaming fails for some unknown reason.
-     */
-    override def rename(srcPath: Path, destPath: Path): Unit = {
-      if (!fs.exists(srcPath)) {
-        throw new FileNotFoundException(s"Source path does not exist: $srcPath")
-      }
-      if (fs.exists(destPath)) {
-        throw new FileAlreadyExistsException(s"Destination path already exists: $destPath")
-      }
-      if (!fs.rename(srcPath, destPath)) {
-        throw new IOException(s"Failed to rename $srcPath to $destPath")
-      }
-    }
-
-    override def mkdirs(path: Path): Unit = {
-      fs.mkdirs(path, FsPermission.getDirDefault)
-    }
-
-    override def open(path: Path): FSDataInputStream = {
-      fs.open(path)
-    }
-
-    override def create(path: Path): FSDataOutputStream = {
-      fs.create(path, false)
-    }
-
-    override def exists(path: Path): Boolean = {
-      fs.exists(path)
-    }
-
-    override def delete(path: Path): Unit = {
-      try {
-        fs.delete(path, true)
-      } catch {
-        case e: FileNotFoundException =>
-          // ignore if file has already been deleted
-      }
-    }
-  }
-
   /**
    * Verify if batchIds are continuous and between `startId` and `endId`.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
index 1da703cefd8ea..5cacdd070b735 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
@@ -30,14 +30,14 @@ import org.apache.spark.sql.types.StructType
  * A [[FileIndex]] that generates the list of files to processing by reading them from the
  * metadata log files generated by the [[FileStreamSink]].
  *
- * @param userPartitionSchema an optional partition schema that will be use to provide types for
- *                            the discovered partitions
+ * @param userSpecifiedSchema an optional user specified schema that will be use to provide
+ *                            types for the discovered partitions
  */
 class MetadataLogFileIndex(
     sparkSession: SparkSession,
     path: Path,
-    userPartitionSchema: Option[StructType])
-  extends PartitioningAwareFileIndex(sparkSession, Map.empty, userPartitionSchema) {
+    userSpecifiedSchema: Option[StructType])
+  extends PartitioningAwareFileIndex(sparkSession, Map.empty, userSpecifiedSchema) {
 
   private val metadataDirectory = new Path(path, FileStreamSink.metadataDir)
   logInfo(s"Reading streaming file log from $metadataDirectory")
@@ -51,7 +51,7 @@ class MetadataLogFileIndex(
   }
 
   override protected val leafDirToChildrenFiles: Map[Path, Array[FileStatus]] = {
-    allFilesFromLog.toArray.groupBy(_.getPath.getParent)
+    allFilesFromLog.groupBy(_.getPath.getParent)
   }
 
   override def rootPaths: Seq[Path] = path :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 1758b3844bd62..951d694355ec5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, WriteToDataSourceV2}
 import org.apache.spark.sql.execution.streaming.{ContinuousExecutionRelation, StreamingRelationV2, _}
+import org.apache.spark.sql.sources.v2
 import org.apache.spark.sql.sources.v2.{ContinuousReadSupport, DataSourceOptions, StreamWriteSupport}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReader, PartitionOffset}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
@@ -317,8 +318,10 @@ class ContinuousExecution(
     synchronized {
       if (queryExecutionThread.isAlive) {
         commitLog.add(epoch)
-        val offset = offsetLog.get(epoch).get.offsets(0).get
+        val offset =
+          continuousSources(0).deserializeOffset(offsetLog.get(epoch).get.offsets(0).get.json)
         committedOffsets ++= Seq(continuousSources(0) -> offset)
+        continuousSources(0).commit(offset.asInstanceOf[v2.reader.streaming.Offset])
       } else {
         return
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 352d4ce9fbcaa..628923d367ce7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -24,17 +24,19 @@ import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
+import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.encoders.encoderFor
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeRow}
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
+import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.sources.v2.reader.{DataReader, DataReaderFactory, SupportsScanUnsafeRow}
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReader, Offset => OffsetV2}
-import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.streaming.{OutputMode, Trigger}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
@@ -47,16 +49,43 @@ object MemoryStream {
     new MemoryStream[A](memoryStreamId.getAndIncrement(), sqlContext)
 }
 
+/**
+ * A base class for memory stream implementations. Supports adding data and resetting.
+ */
+abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends BaseStreamingSource {
+  protected val encoder = encoderFor[A]
+  protected val attributes = encoder.schema.toAttributes
+
+  def toDS(): Dataset[A] = {
+    Dataset[A](sqlContext.sparkSession, logicalPlan)
+  }
+
+  def toDF(): DataFrame = {
+    Dataset.ofRows(sqlContext.sparkSession, logicalPlan)
+  }
+
+  def addData(data: A*): Offset = {
+    addData(data.toTraversable)
+  }
+
+  def readSchema(): StructType = encoder.schema
+
+  protected def logicalPlan: LogicalPlan
+
+  def addData(data: TraversableOnce[A]): Offset
+}
+
 /**
  * A [[Source]] that produces value stored in memory as they are added by the user.  This [[Source]]
  * is intended for use in unit tests as it can only replay data when the object is still
  * available.
  */
 case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
-    extends MicroBatchReader with SupportsScanUnsafeRow with Logging {
-  protected val encoder = encoderFor[A]
-  private val attributes = encoder.schema.toAttributes
-  protected val logicalPlan = StreamingExecutionRelation(this, attributes)(sqlContext.sparkSession)
+    extends MemoryStreamBase[A](sqlContext)
+      with MicroBatchReader with SupportsScanUnsafeRow with Logging {
+
+  protected val logicalPlan: LogicalPlan =
+    StreamingExecutionRelation(this, attributes)(sqlContext.sparkSession)
   protected val output = logicalPlan.output
 
   /**
@@ -70,7 +99,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
   protected var currentOffset: LongOffset = new LongOffset(-1)
 
   @GuardedBy("this")
-  private var startOffset = new LongOffset(-1)
+  protected var startOffset = new LongOffset(-1)
 
   @GuardedBy("this")
   private var endOffset = new LongOffset(-1)
@@ -82,18 +111,6 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
   @GuardedBy("this")
   protected var lastOffsetCommitted : LongOffset = new LongOffset(-1)
 
-  def toDS(): Dataset[A] = {
-    Dataset(sqlContext.sparkSession, logicalPlan)
-  }
-
-  def toDF(): DataFrame = {
-    Dataset.ofRows(sqlContext.sparkSession, logicalPlan)
-  }
-
-  def addData(data: A*): Offset = {
-    addData(data.toTraversable)
-  }
-
   def addData(data: TraversableOnce[A]): Offset = {
     val objects = data.toSeq
     val rows = objects.iterator.map(d => encoder.toRow(d).copy().asInstanceOf[UnsafeRow]).toArray
@@ -114,8 +131,6 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
     }
   }
 
-  override def readSchema(): StructType = encoder.schema
-
   override def deserializeOffset(json: String): OffsetV2 = LongOffset(json.toLong)
 
   override def getStartOffset: OffsetV2 = synchronized {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
new file mode 100644
index 0000000000000..c28919b8b729b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.sources
+
+import java.{util => ju}
+import java.util.Optional
+import java.util.concurrent.atomic.AtomicInteger
+import javax.annotation.concurrent.GuardedBy
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ListBuffer
+
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
+import org.apache.spark.sql.{Encoder, Row, SQLContext}
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream.GetRecord
+import org.apache.spark.sql.sources.v2.{ContinuousReadSupport, DataSourceOptions}
+import org.apache.spark.sql.sources.v2.reader.DataReaderFactory
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousDataReader, ContinuousReader, Offset, PartitionOffset}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.RpcUtils
+
+/**
+ * The overall strategy here is:
+ *  * ContinuousMemoryStream maintains a list of records for each partition. addData() will
+ *    distribute records evenly-ish across partitions.
+ *  * RecordEndpoint is set up as an endpoint for executor-side
+ *    ContinuousMemoryStreamDataReader instances to poll. It returns the record at the specified
+ *    offset within the list, or null if that offset doesn't yet have a record.
+ */
+class ContinuousMemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
+  extends MemoryStreamBase[A](sqlContext) with ContinuousReader with ContinuousReadSupport {
+  private implicit val formats = Serialization.formats(NoTypeHints)
+  private val NUM_PARTITIONS = 2
+
+  protected val logicalPlan =
+    StreamingRelationV2(this, "memory", Map(), attributes, None)(sqlContext.sparkSession)
+
+  // ContinuousReader implementation
+
+  @GuardedBy("this")
+  private val records = Seq.fill(NUM_PARTITIONS)(new ListBuffer[A])
+
+  @GuardedBy("this")
+  private var startOffset: ContinuousMemoryStreamOffset = _
+
+  private val recordEndpoint = new RecordEndpoint()
+  @volatile private var endpointRef: RpcEndpointRef = _
+
+  def addData(data: TraversableOnce[A]): Offset = synchronized {
+    // Distribute data evenly among partition lists.
+    data.toSeq.zipWithIndex.map {
+      case (item, index) => records(index % NUM_PARTITIONS) += item
+    }
+
+    // The new target offset is the offset where all records in all partitions have been processed.
+    ContinuousMemoryStreamOffset((0 until NUM_PARTITIONS).map(i => (i, records(i).size)).toMap)
+  }
+
+  override def setStartOffset(start: Optional[Offset]): Unit = synchronized {
+    // Inferred initial offset is position 0 in each partition.
+    startOffset = start.orElse {
+      ContinuousMemoryStreamOffset((0 until NUM_PARTITIONS).map(i => (i, 0)).toMap)
+    }.asInstanceOf[ContinuousMemoryStreamOffset]
+  }
+
+  override def getStartOffset: Offset = synchronized {
+    startOffset
+  }
+
+  override def deserializeOffset(json: String): ContinuousMemoryStreamOffset = {
+    ContinuousMemoryStreamOffset(Serialization.read[Map[Int, Int]](json))
+  }
+
+  override def mergeOffsets(offsets: Array[PartitionOffset]): ContinuousMemoryStreamOffset = {
+    ContinuousMemoryStreamOffset(
+      offsets.map {
+        case ContinuousMemoryStreamPartitionOffset(part, num) => (part, num)
+      }.toMap
+    )
+  }
+
+  override def createDataReaderFactories(): ju.List[DataReaderFactory[Row]] = {
+    synchronized {
+      val endpointName = s"ContinuousMemoryStreamRecordEndpoint-${java.util.UUID.randomUUID()}-$id"
+      endpointRef =
+        recordEndpoint.rpcEnv.setupEndpoint(endpointName, recordEndpoint)
+
+      startOffset.partitionNums.map {
+        case (part, index) =>
+          new ContinuousMemoryStreamDataReaderFactory(
+            endpointName, part, index): DataReaderFactory[Row]
+      }.toList.asJava
+    }
+  }
+
+  override def stop(): Unit = {
+    if (endpointRef != null) recordEndpoint.rpcEnv.stop(endpointRef)
+  }
+
+  override def commit(end: Offset): Unit = {}
+
+  // ContinuousReadSupport implementation
+  // This is necessary because of how StreamTest finds the source for AddDataMemory steps.
+  def createContinuousReader(
+      schema: Optional[StructType],
+      checkpointLocation: String,
+      options: DataSourceOptions): ContinuousReader = {
+    this
+  }
+
+  /**
+   * Endpoint for executors to poll for records.
+   */
+  private class RecordEndpoint extends ThreadSafeRpcEndpoint {
+    override val rpcEnv: RpcEnv = SparkEnv.get.rpcEnv
+
+    override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+      case GetRecord(ContinuousMemoryStreamPartitionOffset(part, index)) =>
+        ContinuousMemoryStream.this.synchronized {
+          val buf = records(part)
+          val record = if (buf.size <= index) None else Some(buf(index))
+
+          context.reply(record.map(Row(_)))
+        }
+    }
+  }
+}
+
+object ContinuousMemoryStream {
+  case class GetRecord(offset: ContinuousMemoryStreamPartitionOffset)
+  protected val memoryStreamId = new AtomicInteger(0)
+
+  def apply[A : Encoder](implicit sqlContext: SQLContext): ContinuousMemoryStream[A] =
+    new ContinuousMemoryStream[A](memoryStreamId.getAndIncrement(), sqlContext)
+}
+
+/**
+ * Data reader factory for continuous memory stream.
+ */
+class ContinuousMemoryStreamDataReaderFactory(
+    driverEndpointName: String,
+    partition: Int,
+    startOffset: Int) extends DataReaderFactory[Row] {
+  override def createDataReader: ContinuousMemoryStreamDataReader =
+    new ContinuousMemoryStreamDataReader(driverEndpointName, partition, startOffset)
+}
+
+/**
+ * Data reader for continuous memory stream.
+ *
+ * Polls the driver endpoint for new records.
+ */
+class ContinuousMemoryStreamDataReader(
+    driverEndpointName: String,
+    partition: Int,
+    startOffset: Int) extends ContinuousDataReader[Row] {
+  private val endpoint = RpcUtils.makeDriverRef(
+    driverEndpointName,
+    SparkEnv.get.conf,
+    SparkEnv.get.rpcEnv)
+
+  private var currentOffset = startOffset
+  private var current: Option[Row] = None
+
+  override def next(): Boolean = {
+    current = None
+    while (current.isEmpty) {
+      Thread.sleep(10)
+      current = endpoint.askSync[Option[Row]](
+          GetRecord(ContinuousMemoryStreamPartitionOffset(partition, currentOffset)))
+    }
+    currentOffset += 1
+    true
+  }
+
+  override def get(): Row = current.get
+
+  override def close(): Unit = {}
+
+  override def getOffset: ContinuousMemoryStreamPartitionOffset =
+    ContinuousMemoryStreamPartitionOffset(partition, currentOffset)
+}
+
+case class ContinuousMemoryStreamOffset(partitionNums: Map[Int, Int])
+  extends Offset {
+  private implicit val formats = Serialization.formats(NoTypeHints)
+  override def json(): String = Serialization.write(partitionNums)
+}
+
+case class ContinuousMemoryStreamPartitionOffset(partition: Int, numProcessed: Int)
+  extends PartitionOffset
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 3f5002a4e6937..df722b953228b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
-import java.io.{DataInputStream, DataOutputStream, FileNotFoundException, IOException}
+import java.io._
 import java.nio.channels.ClosedChannelException
 import java.util.Locale
 
@@ -27,13 +27,16 @@ import scala.util.Random
 import scala.util.control.NonFatal
 
 import com.google.common.io.ByteStreams
+import org.apache.commons.io.IOUtils
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs._
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.internal.Logging
 import org.apache.spark.io.LZ4CompressionCodec
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.{SizeEstimator, Utils}
 
@@ -87,10 +90,10 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
     case object ABORTED extends STATE
 
     private val newVersion = version + 1
-    private val tempDeltaFile = new Path(baseDir, s"temp-${Random.nextLong}")
-    private lazy val tempDeltaFileStream = compressStream(fs.create(tempDeltaFile, true))
     @volatile private var state: STATE = UPDATING
-    @volatile private var finalDeltaFile: Path = null
+    private val finalDeltaFile: Path = deltaFile(newVersion)
+    private lazy val deltaFileStream = fm.createAtomic(finalDeltaFile, overwriteIfPossible = true)
+    private lazy val compressedStream = compressStream(deltaFileStream)
 
     override def id: StateStoreId = HDFSBackedStateStoreProvider.this.stateStoreId
 
@@ -103,14 +106,14 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
       val keyCopy = key.copy()
       val valueCopy = value.copy()
       mapToUpdate.put(keyCopy, valueCopy)
-      writeUpdateToDeltaFile(tempDeltaFileStream, keyCopy, valueCopy)
+      writeUpdateToDeltaFile(compressedStream, keyCopy, valueCopy)
     }
 
     override def remove(key: UnsafeRow): Unit = {
       verify(state == UPDATING, "Cannot remove after already committed or aborted")
       val prevValue = mapToUpdate.remove(key)
       if (prevValue != null) {
-        writeRemoveToDeltaFile(tempDeltaFileStream, key)
+        writeRemoveToDeltaFile(compressedStream, key)
       }
     }
 
@@ -126,8 +129,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
       verify(state == UPDATING, "Cannot commit after already committed or aborted")
 
       try {
-        finalizeDeltaFile(tempDeltaFileStream)
-        finalDeltaFile = commitUpdates(newVersion, mapToUpdate, tempDeltaFile)
+        commitUpdates(newVersion, mapToUpdate, compressedStream)
         state = COMMITTED
         logInfo(s"Committed version $newVersion for $this to file $finalDeltaFile")
         newVersion
@@ -140,23 +142,14 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
 
     /** Abort all the updates made on this store. This store will not be usable any more. */
     override def abort(): Unit = {
-      verify(state == UPDATING || state == ABORTED, "Cannot abort after already committed")
-      try {
+      // This if statement is to ensure that files are deleted only if there are changes to the
+      // StateStore. We have two StateStores for each task, one which is used only for reading, and
+      // the other used for read+write. We don't want the read-only to delete state files.
+      if (state == UPDATING) {
+        state = ABORTED
+        cancelDeltaFile(compressedStream, deltaFileStream)
+      } else {
         state = ABORTED
-        if (tempDeltaFileStream != null) {
-          tempDeltaFileStream.close()
-        }
-        if (tempDeltaFile != null) {
-          fs.delete(tempDeltaFile, true)
-        }
-      } catch {
-        case c: ClosedChannelException =>
-          // This can happen when underlying file output stream has been closed before the
-          // compression stream.
-          logDebug(s"Error aborting version $newVersion into $this", c)
-
-        case e: Exception =>
-          logWarning(s"Error aborting version $newVersion into $this", e)
       }
       logInfo(s"Aborted version $newVersion for $this")
     }
@@ -212,7 +205,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
     this.valueSchema = valueSchema
     this.storeConf = storeConf
     this.hadoopConf = hadoopConf
-    fs.mkdirs(baseDir)
+    fm.mkdirs(baseDir)
   }
 
   override def stateStoreId: StateStoreId = stateStoreId_
@@ -251,31 +244,15 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
 
   private lazy val loadedMaps = new mutable.HashMap[Long, MapType]
   private lazy val baseDir = stateStoreId.storeCheckpointLocation()
-  private lazy val fs = baseDir.getFileSystem(hadoopConf)
+  private lazy val fm = CheckpointFileManager.create(baseDir, hadoopConf)
   private lazy val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf)
 
   private case class StoreFile(version: Long, path: Path, isSnapshot: Boolean)
 
-  /** Commit a set of updates to the store with the given new version */
-  private def commitUpdates(newVersion: Long, map: MapType, tempDeltaFile: Path): Path = {
+  private def commitUpdates(newVersion: Long, map: MapType, output: DataOutputStream): Unit = {
     synchronized {
-      val finalDeltaFile = deltaFile(newVersion)
-
-      // scalastyle:off
-      // Renaming a file atop an existing one fails on HDFS
-      // (http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/filesystem/filesystem.html).
-      // Hence we should either skip the rename step or delete the target file. Because deleting the
-      // target file will break speculation, skipping the rename step is the only choice. It's still
-      // semantically correct because Structured Streaming requires rerunning a batch should
-      // generate the same output. (SPARK-19677)
-      // scalastyle:on
-      if (fs.exists(finalDeltaFile)) {
-        fs.delete(tempDeltaFile, true)
-      } else if (!fs.rename(tempDeltaFile, finalDeltaFile)) {
-        throw new IOException(s"Failed to rename $tempDeltaFile to $finalDeltaFile")
-      }
+      finalizeDeltaFile(output)
       loadedMaps.put(newVersion, map)
-      finalDeltaFile
     }
   }
 
@@ -365,7 +342,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
     val fileToRead = deltaFile(version)
     var input: DataInputStream = null
     val sourceStream = try {
-      fs.open(fileToRead)
+      fm.open(fileToRead)
     } catch {
       case f: FileNotFoundException =>
         throw new IllegalStateException(
@@ -412,12 +389,12 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
   }
 
   private def writeSnapshotFile(version: Long, map: MapType): Unit = {
-    val fileToWrite = snapshotFile(version)
-    val tempFile =
-      new Path(fileToWrite.getParent, s"${fileToWrite.getName}.temp-${Random.nextLong}")
+    val targetFile = snapshotFile(version)
+    var rawOutput: CancellableFSDataOutputStream = null
     var output: DataOutputStream = null
-    Utils.tryWithSafeFinally {
-      output = compressStream(fs.create(tempFile, false))
+    try {
+      rawOutput = fm.createAtomic(targetFile, overwriteIfPossible = true)
+      output = compressStream(rawOutput)
       val iter = map.entrySet().iterator()
       while(iter.hasNext) {
         val entry = iter.next()
@@ -429,16 +406,34 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
         output.write(valueBytes)
       }
       output.writeInt(-1)
-    } {
-      if (output != null) output.close()
+      output.close()
+    } catch {
+      case e: Throwable =>
+        cancelDeltaFile(compressedStream = output, rawStream = rawOutput)
+        throw e
     }
-    if (fs.exists(fileToWrite)) {
-      // Skip rename if the file is alreayd created.
-      fs.delete(tempFile, true)
-    } else if (!fs.rename(tempFile, fileToWrite)) {
-      throw new IOException(s"Failed to rename $tempFile to $fileToWrite")
+    logInfo(s"Written snapshot file for version $version of $this at $targetFile")
+  }
+
+  /**
+   * Try to cancel the underlying stream and safely close the compressed stream.
+   *
+   * @param compressedStream the compressed stream.
+   * @param rawStream the underlying stream which needs to be cancelled.
+   */
+  private def cancelDeltaFile(
+      compressedStream: DataOutputStream,
+      rawStream: CancellableFSDataOutputStream): Unit = {
+    try {
+      if (rawStream != null) rawStream.cancel()
+      IOUtils.closeQuietly(compressedStream)
+    } catch {
+      case e: FSError if e.getCause.isInstanceOf[IOException] =>
+        // Closing the compressedStream causes the stream to write/flush flush data into the
+        // rawStream. Since the rawStream is already closed, there may be errors.
+        // Usually its an IOException. However, Hadoop's RawLocalFileSystem wraps
+        // IOException into FSError.
     }
-    logInfo(s"Written snapshot file for version $version of $this at $fileToWrite")
   }
 
   private def readSnapshotFile(version: Long): Option[MapType] = {
@@ -447,7 +442,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
     var input: DataInputStream = null
 
     try {
-      input = decompressStream(fs.open(fileToRead))
+      input = decompressStream(fm.open(fileToRead))
       var eof = false
 
       while (!eof) {
@@ -508,7 +503,6 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
           case None =>
             // The last map is not loaded, probably some other instance is in charge
         }
-
       }
     } catch {
       case NonFatal(e) =>
@@ -534,7 +528,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
           }
           val filesToDelete = files.filter(_.version < earliestFileToRetain.version)
           filesToDelete.foreach { f =>
-            fs.delete(f.path, true)
+            fm.delete(f.path)
           }
           logInfo(s"Deleted files older than ${earliestFileToRetain.version} for $this: " +
             filesToDelete.mkString(", "))
@@ -576,7 +570,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
   /** Fetch all the files that back the store */
   private def fetchFiles(): Seq[StoreFile] = {
     val files: Seq[FileStatus] = try {
-      fs.listStatus(baseDir)
+      fm.list(baseDir)
     } catch {
       case _: java.io.FileNotFoundException =>
         Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index d1d9f95cb0977..7eb68c21569ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -459,7 +459,6 @@ object StateStore extends Logging {
   private def coordinatorRef: Option[StateStoreCoordinatorRef] = loadedProviders.synchronized {
     val env = SparkEnv.get
     if (env != null) {
-      logInfo("Env is not null")
       val isDriver =
         env.executorId == SparkContext.DRIVER_IDENTIFIER ||
           env.executorId == SparkContext.LEGACY_DRIVER_IDENTIFIER
@@ -467,13 +466,12 @@ object StateStore extends Logging {
       // as SparkContext + SparkEnv may have been restarted. Hence, when running in driver,
       // always recreate the reference.
       if (isDriver || _coordRef == null) {
-        logInfo("Getting StateStoreCoordinatorRef")
+        logDebug("Getting StateStoreCoordinatorRef")
         _coordRef = StateStoreCoordinatorRef.forExecutor(env)
       }
       logInfo(s"Retrieved reference to StateStoreCoordinator: ${_coordRef}")
       Some(_coordRef)
     } else {
-      logInfo("Env is null")
       _coordRef = null
       None
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 3c580ba6f6ceb..f611a61ad709e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3310,6 +3310,22 @@ object functions {
    */
   def sort_array(e: Column, asc: Boolean): Column = withExpr { SortArray(e.expr, lit(asc).expr) }
 
+  /**
+   * Returns the minimum value in the array.
+   *
+   * @group collection_funcs
+   * @since 2.4.0
+   */
+  def array_min(e: Column): Column = withExpr { ArrayMin(e.expr) }
+
+  /**
+   * Returns the maximum value in the array.
+   *
+   * @group collection_funcs
+   * @since 2.4.0
+   */
+  def array_max(e: Column): Column = withExpr { ArrayMax(e.expr) }
+
   /**
    * Returns an unordered array containing the keys of the map.
    * @group collection_funcs
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index adea2bfa82cd3..547c2bef02b24 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -25,3 +25,5 @@ create temporary view ttf2 as select * from values
 select current_date = current_date(), current_timestamp = current_timestamp(), a, b from ttf2;
 
 select a, b from ttf2 order by a, current_date;
+
+select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15');
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index bbb6851e69c7e..4e1cfa6e48c1c 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 9
+-- Number of queries: 10
 
 
 -- !query 0
@@ -81,3 +81,10 @@ struct<a:int,b:int>
 -- !query 8 output
 1	2
 2	3
+
+-- !query 9
+select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15')
+-- !query 3 schema
+struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):int,weekday(CAST(2017-05-27 AS DATE)):int,weekday(CAST(NULL AS DATE)):int,weekday(CAST(1582-10-15 13:10:15 AS DATE)):int>
+-- !query 3 output
+5	3	5	NULL	4
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
index cee85ec8af04d..949505e449fd7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
@@ -39,7 +39,7 @@ class ConfigBehaviorSuite extends QueryTest with SharedSQLContext {
     def computeChiSquareTest(): Double = {
       val n = 10000
       // Trigger a sort
-      val data = spark.range(0, n, 1, 1).sort('id)
+      val data = spark.range(0, n, 1, 1).sort('id.desc)
         .selectExpr("SPARK_PARTITION_ID() pid", "id").as[(Int, Long)].collect()
 
       // Compute histogram for the number of records per partition post sort
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 4fcf8681e4bd0..fb28c9776bf50 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -430,6 +430,34 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("array_min function") {
+    val df = Seq(
+      Seq[Option[Int]](Some(1), Some(3), Some(2)),
+      Seq.empty[Option[Int]],
+      Seq[Option[Int]](None),
+      Seq[Option[Int]](None, Some(1), Some(-100))
+    ).toDF("a")
+
+    val answer = Seq(Row(1), Row(null), Row(null), Row(-100))
+
+    checkAnswer(df.select(array_min(df("a"))), answer)
+    checkAnswer(df.selectExpr("array_min(a)"), answer)
+  }
+
+  test("array_max function") {
+    val df = Seq(
+      Seq[Option[Int]](Some(1), Some(3), Some(2)),
+      Seq.empty[Option[Int]],
+      Seq[Option[Int]](None),
+      Seq[Option[Int]](None, Some(1), Some(-100))
+    ).toDF("a")
+
+    val answer = Seq(Row(3), Row(null), Row(null), Row(1))
+
+    checkAnswer(df.select(array_max(df("a"))), answer)
+    checkAnswer(df.selectExpr("array_max(a)"), answer)
+  }
+
   private def assertValuesDoNotChangeAfterCoalesceOrUnion(v: Column): Unit = {
     import DataFrameFunctionsSuite.CodegenFallbackExpr
     for ((codegenFallback, wholeStage) <- Seq((true, false), (false, false), (false, true))) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 4efae4c46c2e1..7d1366092d1e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -44,6 +44,8 @@ class SessionStateSuite extends SparkFunSuite {
     if (activeSession != null) {
       activeSession.stop()
       activeSession = null
+      SparkSession.clearActiveSession()
+      SparkSession.clearDefaultSession()
     }
     super.afterAll()
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestQueryExecutionListener.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestQueryExecutionListener.scala
new file mode 100644
index 0000000000000..d2a6358ee822b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestQueryExecutionListener.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.concurrent.atomic.AtomicBoolean
+
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.util.QueryExecutionListener
+
+
+class TestQueryExecutionListener extends QueryExecutionListener {
+  override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+    OnSuccessCall.isOnSuccessCalled.set(true)
+  }
+
+  override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = { }
+}
+
+/**
+ * This has a variable to check if `onSuccess` is actually called or not. Currently, this is for
+ * the test case in PySpark. See SPARK-23942.
+ */
+object OnSuccessCall {
+  val isOnSuccessCalled = new AtomicBoolean(false)
+
+  def isCalled(): Boolean = isOnSuccessCalled.get()
+
+  def clear(): Unit = isOnSuccessCalled.set(false)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index f8b26f5b28cc7..40915a102bab0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.{execution, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, LeftOuter, RightOuter}
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Repartition}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Repartition, Sort}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReusedExchangeExec, ReuseExchange, ShuffleExchangeExec}
@@ -197,6 +197,19 @@ class PlannerSuite extends SharedSQLContext {
     assert(planned.child.isInstanceOf[CollectLimitExec])
   }
 
+  test("SPARK-23375: Cached sorted data doesn't need to be re-sorted") {
+    val query = testData.select('key, 'value).sort('key.desc).cache()
+    assert(query.queryExecution.optimizedPlan.isInstanceOf[InMemoryRelation])
+    val resorted = query.sort('key.desc)
+    assert(resorted.queryExecution.optimizedPlan.collect { case s: Sort => s}.isEmpty)
+    assert(resorted.select('key).collect().map(_.getInt(0)).toSeq ==
+      (1 to 100).reverse)
+    // with a different order, the sort is needed
+    val sortedAsc = query.sort('key)
+    assert(sortedAsc.queryExecution.optimizedPlan.collect { case s: Sort => s}.size == 1)
+    assert(sortedAsc.select('key).collect().map(_.getInt(0)).toSeq == (1 to 100))
+  }
+
   test("PartitioningCollection") {
     withTempView("normal", "small", "tiny") {
       testData.createOrReplaceTempView("normal")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 26b63e8e8490f..9b7b316211d30 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp}
 
 import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, In}
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.{FilterExec, LocalTableScanExec, WholeStageCodegenExec}
 import org.apache.spark.sql.functions._
@@ -42,7 +43,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     val storageLevel = MEMORY_ONLY
     val plan = spark.sessionState.executePlan(data.logicalPlan).sparkPlan
     val inMemoryRelation = InMemoryRelation(useCompression = true, 5, storageLevel, plan, None,
-      data.logicalPlan.stats)
+      data.logicalPlan)
 
     assert(inMemoryRelation.cachedColumnBuffers.getStorageLevel == storageLevel)
     inMemoryRelation.cachedColumnBuffers.collect().head match {
@@ -119,7 +120,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   test("simple columnar query") {
     val plan = spark.sessionState.executePlan(testData.logicalPlan).sparkPlan
     val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None,
-      testData.logicalPlan.stats)
+      testData.logicalPlan)
 
     checkAnswer(scan, testData.collect().toSeq)
   }
@@ -138,7 +139,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     val logicalPlan = testData.select('value, 'key).logicalPlan
     val plan = spark.sessionState.executePlan(logicalPlan).sparkPlan
     val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None,
-      logicalPlan.stats)
+      logicalPlan)
 
     checkAnswer(scan, testData.collect().map {
       case Row(key: Int, value: String) => value -> key
@@ -155,7 +156,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   test("SPARK-1436 regression: in-memory columns must be able to be accessed multiple times") {
     val plan = spark.sessionState.executePlan(testData.logicalPlan).sparkPlan
     val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None,
-      testData.logicalPlan.stats)
+      testData.logicalPlan)
 
     checkAnswer(scan, testData.collect().toSeq)
     checkAnswer(scan, testData.collect().toSeq)
@@ -329,7 +330,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   test("SPARK-17549: cached table size should be correctly calculated") {
     val data = spark.sparkContext.parallelize(1 to 10, 5).toDF()
     val plan = spark.sessionState.executePlan(data.logicalPlan).sparkPlan
-    val cached = InMemoryRelation(true, 5, MEMORY_ONLY, plan, None, data.logicalPlan.stats)
+    val cached = InMemoryRelation(true, 5, MEMORY_ONLY, plan, None, data.logicalPlan)
 
     // Materialize the data.
     val expectedAnswer = data.collect()
@@ -455,7 +456,8 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   test("SPARK-22249: buildFilter should not throw exception when In contains an empty list") {
     val attribute = AttributeReference("a", IntegerType)()
     val localTableScanExec = LocalTableScanExec(Seq(attribute), Nil)
-    val testRelation = InMemoryRelation(false, 1, MEMORY_ONLY, localTableScanExec, None, null)
+    val testRelation = InMemoryRelation(false, 1, MEMORY_ONLY, localTableScanExec, None,
+      LocalRelation(Seq(attribute), Nil))
     val tableScanExec = InMemoryTableScanExec(Seq(attribute),
       Seq(In(attribute, Nil)), testRelation)
     assert(tableScanExec.partitionFilters.isEmpty)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 4304d0b6f6b16..cbd7f9d6f67be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -425,6 +425,15 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
           sql(s"CREATE TABLE tab1 (col1 int, col2 string) USING ${dataSource}")
         }.getMessage
         assert(ex.contains(exMsgWithDefaultDB))
+
+        // Always check location of managed table, with or without (IF NOT EXISTS)
+        withTable("tab2") {
+          sql(s"CREATE TABLE tab2 (col1 int, col2 string) USING ${dataSource}")
+          ex = intercept[AnalysisException] {
+            sql(s"CREATE TABLE IF NOT EXISTS tab1 LIKE tab2")
+          }.getMessage
+          assert(ex.contains(exMsgWithDefaultDB))
+        }
       } finally {
         waitForTasksToFinish()
         Utils.deleteRecursively(tableLoc)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index c1d61b843d899..8764f0c42cf9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -401,7 +401,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSQLContext with Predi
           sparkSession = spark,
           rootPathsSpecified = Seq(new Path(tempDir)),
           parameters = Map.empty[String, String],
-          partitionSchema = None)
+          userSpecifiedSchema = None)
         // This should not fail.
         fileCatalog.listLeafFiles(Seq(new Path(tempDir)))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index 534d8bb629b8c..dcc540fc4f109 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -34,6 +34,7 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
   import testImplicits._
 
   protected def currentExecutionIds(): Set[Long] = {
+    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
     statusStore.executionsList.map(_.executionId).toSet
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala
new file mode 100644
index 0000000000000..fe59cb25d5005
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.streaming
+
+import java.io._
+import java.net.URI
+
+import scala.util.Random
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util.quietly
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.util.Utils
+
+abstract class CheckpointFileManagerTests extends SparkFunSuite {
+
+  def createManager(path: Path): CheckpointFileManager
+
+  test("mkdirs, list, createAtomic, open, delete, exists") {
+    withTempPath { p =>
+      val basePath = new Path(p.getAbsolutePath)
+      val fm = createManager(basePath)
+      // Mkdirs
+      val dir = new Path(s"$basePath/dir/subdir/subsubdir")
+      assert(!fm.exists(dir))
+      fm.mkdirs(dir)
+      assert(fm.exists(dir))
+      fm.mkdirs(dir)
+
+      // List
+      val acceptAllFilter = new PathFilter {
+        override def accept(path: Path): Boolean = true
+      }
+      val rejectAllFilter = new PathFilter {
+        override def accept(path: Path): Boolean = false
+      }
+      assert(fm.list(basePath, acceptAllFilter).exists(_.getPath.getName == "dir"))
+      assert(fm.list(basePath, rejectAllFilter).length === 0)
+
+      // Create atomic without overwrite
+      var path = new Path(s"$dir/file")
+      assert(!fm.exists(path))
+      fm.createAtomic(path, overwriteIfPossible = false).cancel()
+      assert(!fm.exists(path))
+      fm.createAtomic(path, overwriteIfPossible = false).close()
+      assert(fm.exists(path))
+      quietly {
+        intercept[IOException] {
+          // should throw exception since file exists and overwrite is false
+          fm.createAtomic(path, overwriteIfPossible = false).close()
+        }
+      }
+
+      // Create atomic with overwrite if possible
+      path = new Path(s"$dir/file2")
+      assert(!fm.exists(path))
+      fm.createAtomic(path, overwriteIfPossible = true).cancel()
+      assert(!fm.exists(path))
+      fm.createAtomic(path, overwriteIfPossible = true).close()
+      assert(fm.exists(path))
+      fm.createAtomic(path, overwriteIfPossible = true).close()  // should not throw exception
+
+      // Open and delete
+      fm.open(path).close()
+      fm.delete(path)
+      assert(!fm.exists(path))
+      intercept[IOException] {
+        fm.open(path)
+      }
+      fm.delete(path) // should not throw exception
+    }
+  }
+
+  protected def withTempPath(f: File => Unit): Unit = {
+    val path = Utils.createTempDir()
+    path.delete()
+    try f(path) finally Utils.deleteRecursively(path)
+  }
+}
+
+class CheckpointFileManagerSuite extends SparkFunSuite with SharedSparkSession {
+
+  test("CheckpointFileManager.create() should pick up user-specified class from conf") {
+    withSQLConf(
+      SQLConf.STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key ->
+        classOf[CreateAtomicTestManager].getName) {
+      val fileManager =
+        CheckpointFileManager.create(new Path("/"), spark.sessionState.newHadoopConf)
+      assert(fileManager.isInstanceOf[CreateAtomicTestManager])
+    }
+  }
+
+  test("CheckpointFileManager.create() should fallback from FileContext to FileSystem") {
+    import CheckpointFileManagerSuiteFileSystem.scheme
+    spark.conf.set(s"fs.$scheme.impl", classOf[CheckpointFileManagerSuiteFileSystem].getName)
+    quietly {
+      withTempDir { temp =>
+        val metadataLog = new HDFSMetadataLog[String](spark, s"$scheme://${temp.toURI.getPath}")
+        assert(metadataLog.add(0, "batch0"))
+        assert(metadataLog.getLatest() === Some(0 -> "batch0"))
+        assert(metadataLog.get(0) === Some("batch0"))
+        assert(metadataLog.get(None, Some(0)) === Array(0 -> "batch0"))
+
+        val metadataLog2 = new HDFSMetadataLog[String](spark, s"$scheme://${temp.toURI.getPath}")
+        assert(metadataLog2.get(0) === Some("batch0"))
+        assert(metadataLog2.getLatest() === Some(0 -> "batch0"))
+        assert(metadataLog2.get(None, Some(0)) === Array(0 -> "batch0"))
+      }
+    }
+  }
+}
+
+class FileContextBasedCheckpointFileManagerSuite extends CheckpointFileManagerTests {
+  override def createManager(path: Path): CheckpointFileManager = {
+    new FileContextBasedCheckpointFileManager(path, new Configuration())
+  }
+}
+
+class FileSystemBasedCheckpointFileManagerSuite extends CheckpointFileManagerTests {
+  override def createManager(path: Path): CheckpointFileManager = {
+    new FileSystemBasedCheckpointFileManager(path, new Configuration())
+  }
+}
+
+
+/** A fake implementation to test different characteristics of CheckpointFileManager interface */
+class CreateAtomicTestManager(path: Path, hadoopConf: Configuration)
+  extends FileSystemBasedCheckpointFileManager(path, hadoopConf) {
+
+  import CheckpointFileManager._
+
+  override def createAtomic(path: Path, overwrite: Boolean): CancellableFSDataOutputStream = {
+    if (CreateAtomicTestManager.shouldFailInCreateAtomic) {
+      CreateAtomicTestManager.cancelCalledInCreateAtomic = false
+    }
+    val originalOut = super.createAtomic(path, overwrite)
+
+    new CancellableFSDataOutputStream(originalOut) {
+      override def close(): Unit = {
+        if (CreateAtomicTestManager.shouldFailInCreateAtomic) {
+          throw new IOException("Copy failed intentionally")
+        }
+        super.close()
+      }
+
+      override def cancel(): Unit = {
+        CreateAtomicTestManager.cancelCalledInCreateAtomic = true
+        originalOut.cancel()
+      }
+    }
+  }
+}
+
+object CreateAtomicTestManager {
+  @volatile var shouldFailInCreateAtomic = false
+  @volatile var cancelCalledInCreateAtomic = false
+}
+
+
+/**
+ * CheckpointFileManagerSuiteFileSystem to test fallback of the CheckpointFileManager
+ * from FileContext to FileSystem API.
+ */
+private class CheckpointFileManagerSuiteFileSystem extends RawLocalFileSystem {
+  import CheckpointFileManagerSuiteFileSystem.scheme
+
+  override def getUri: URI = {
+    URI.create(s"$scheme:///")
+  }
+}
+
+private object CheckpointFileManagerSuiteFileSystem {
+  val scheme = s"CheckpointFileManagerSuiteFileSystem${math.abs(Random.nextInt)}"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
index 12eaf63415081..ec961a9ecb592 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -22,15 +22,10 @@ import java.nio.charset.StandardCharsets._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.execution.streaming.FakeFileSystem._
 import org.apache.spark.sql.test.SharedSQLContext
 
 class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext {
 
-  /** To avoid caching of FS objects */
-  override protected def sparkConf =
-    super.sparkConf.set(s"spark.hadoop.fs.$scheme.impl.disable.cache", "true")
-
   import CompactibleFileStreamLog._
 
   /** -- testing of `object CompactibleFileStreamLog` begins -- */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index 4677769c12a35..9268306ce4275 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -17,46 +17,22 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.io.{File, FileNotFoundException, IOException}
-import java.net.URI
+import java.io.File
 import java.util.ConcurrentModificationException
 
 import scala.language.implicitConversions
-import scala.util.Random
 
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs._
 import org.scalatest.concurrent.Waiters._
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.execution.streaming.FakeFileSystem._
-import org.apache.spark.sql.execution.streaming.HDFSMetadataLog.{FileContextManager, FileManager, FileSystemManager}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.UninterruptibleThread
 
 class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
 
-  /** To avoid caching of FS objects */
-  override protected def sparkConf =
-    super.sparkConf.set(s"spark.hadoop.fs.$scheme.impl.disable.cache", "true")
-
   private implicit def toOption[A](a: A): Option[A] = Option(a)
 
-  test("FileManager: FileContextManager") {
-    withTempDir { temp =>
-      val path = new Path(temp.getAbsolutePath)
-      testFileManager(path, new FileContextManager(path, new Configuration))
-    }
-  }
-
-  test("FileManager: FileSystemManager") {
-    withTempDir { temp =>
-      val path = new Path(temp.getAbsolutePath)
-      testFileManager(path, new FileSystemManager(path, new Configuration))
-    }
-  }
-
   test("HDFSMetadataLog: basic") {
     withTempDir { temp =>
       val dir = new File(temp, "dir") // use non-existent directory to test whether log make the dir
@@ -82,26 +58,6 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
-  testQuietly("HDFSMetadataLog: fallback from FileContext to FileSystem") {
-    spark.conf.set(
-      s"fs.$scheme.impl",
-      classOf[FakeFileSystem].getName)
-    withTempDir { temp =>
-      val metadataLog = new HDFSMetadataLog[String](spark, s"$scheme://${temp.toURI.getPath}")
-      assert(metadataLog.add(0, "batch0"))
-      assert(metadataLog.getLatest() === Some(0 -> "batch0"))
-      assert(metadataLog.get(0) === Some("batch0"))
-      assert(metadataLog.get(None, Some(0)) === Array(0 -> "batch0"))
-
-
-      val metadataLog2 = new HDFSMetadataLog[String](spark, s"$scheme://${temp.toURI.getPath}")
-      assert(metadataLog2.get(0) === Some("batch0"))
-      assert(metadataLog2.getLatest() === Some(0 -> "batch0"))
-      assert(metadataLog2.get(None, Some(0)) === Array(0 -> "batch0"))
-
-    }
-  }
-
   test("HDFSMetadataLog: purge") {
     withTempDir { temp =>
       val metadataLog = new HDFSMetadataLog[String](spark, temp.getAbsolutePath)
@@ -121,7 +77,8 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
 
       // There should be exactly one file, called "2", in the metadata directory.
       // This check also tests for regressions of SPARK-17475
-      val allFiles = new File(metadataLog.metadataPath.toString).listFiles().toSeq
+      val allFiles = new File(metadataLog.metadataPath.toString).listFiles()
+        .filter(!_.getName.startsWith(".")).toSeq
       assert(allFiles.size == 1)
       assert(allFiles(0).getName() == "2")
     }
@@ -172,7 +129,7 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
-  test("HDFSMetadataLog: metadata directory collision") {
+  testQuietly("HDFSMetadataLog: metadata directory collision") {
     withTempDir { temp =>
       val waiter = new Waiter
       val maxBatchId = 100
@@ -206,60 +163,6 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
-  /** Basic test case for [[FileManager]] implementation. */
-  private def testFileManager(basePath: Path, fm: FileManager): Unit = {
-    // Mkdirs
-    val dir = new Path(s"$basePath/dir/subdir/subsubdir")
-    assert(!fm.exists(dir))
-    fm.mkdirs(dir)
-    assert(fm.exists(dir))
-    fm.mkdirs(dir)
-
-    // List
-    val acceptAllFilter = new PathFilter {
-      override def accept(path: Path): Boolean = true
-    }
-    val rejectAllFilter = new PathFilter {
-      override def accept(path: Path): Boolean = false
-    }
-    assert(fm.list(basePath, acceptAllFilter).exists(_.getPath.getName == "dir"))
-    assert(fm.list(basePath, rejectAllFilter).length === 0)
-
-    // Create
-    val path = new Path(s"$dir/file")
-    assert(!fm.exists(path))
-    fm.create(path).close()
-    assert(fm.exists(path))
-    intercept[IOException] {
-      fm.create(path)
-    }
-
-    // Open and delete
-    fm.open(path).close()
-    fm.delete(path)
-    assert(!fm.exists(path))
-    intercept[IOException] {
-      fm.open(path)
-    }
-    fm.delete(path) // should not throw exception
-
-    // Rename
-    val path1 = new Path(s"$dir/file1")
-    val path2 = new Path(s"$dir/file2")
-    fm.create(path1).close()
-    assert(fm.exists(path1))
-    fm.rename(path1, path2)
-    intercept[FileNotFoundException] {
-      fm.rename(path1, path2)
-    }
-    val path3 = new Path(s"$dir/file3")
-    fm.create(path3).close()
-    assert(fm.exists(path3))
-    intercept[FileAlreadyExistsException] {
-      fm.rename(path2, path3)
-    }
-  }
-
   test("verifyBatchIds") {
     import HDFSMetadataLog.verifyBatchIds
     verifyBatchIds(Seq(1L, 2L, 3L), Some(1L), Some(3L))
@@ -277,14 +180,3 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
     intercept[IllegalStateException](verifyBatchIds(Seq(1, 2, 4, 5), Some(1L), Some(5L)))
   }
 }
-
-/** FakeFileSystem to test fallback of the HDFSMetadataLog from FileContext to FileSystem API */
-class FakeFileSystem extends RawLocalFileSystem {
-  override def getUri: URI = {
-    URI.create(s"$scheme:///")
-  }
-}
-
-object FakeFileSystem {
-  val scheme = s"HDFSMetadataLogSuite${math.abs(Random.nextInt)}"
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index c843b65020d8c..73f8705060402 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.streaming.state
 import java.io.{File, IOException}
 import java.net.URI
 import java.util.UUID
-import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -28,17 +27,17 @@ import scala.util.Random
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
+import org.apache.hadoop.fs._
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkFunSuite}
+import org.apache.spark._
 import org.apache.spark.LocalSparkContext._
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.util.quietly
-import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingQueryWrapper}
+import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.count
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -138,7 +137,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     assert(getData(provider, 19) === Set("a" -> 19))
   }
 
-  test("SPARK-19677: Committing a delta file atop an existing one should not fail on HDFS") {
+  testQuietly("SPARK-19677: Committing a delta file atop an existing one should not fail on HDFS") {
     val conf = new Configuration()
     conf.set("fs.fake.impl", classOf[RenameLikeHDFSFileSystem].getName)
     conf.set("fs.defaultFS", "fake:///")
@@ -344,7 +343,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     }
   }
 
-  test("SPARK-18342: commit fails when rename fails") {
+  testQuietly("SPARK-18342: commit fails when rename fails") {
     import RenameReturnsFalseFileSystem._
     val dir = scheme + "://" + newDir()
     val conf = new Configuration()
@@ -366,7 +365,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
 
     def numTempFiles: Int = {
       if (deltaFileDir.exists) {
-        deltaFileDir.listFiles.map(_.getName).count(n => n.contains("temp") && !n.startsWith("."))
+        deltaFileDir.listFiles.map(_.getName).count(n => n.endsWith(".tmp"))
       } else 0
     }
 
@@ -471,6 +470,43 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     }
   }
 
+  test("error writing [version].delta cancels the output stream") {
+
+    val hadoopConf = new Configuration()
+    hadoopConf.set(
+      SQLConf.STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key,
+      classOf[CreateAtomicTestManager].getName)
+    val remoteDir = Utils.createTempDir().getAbsolutePath
+
+    val provider = newStoreProvider(
+      opId = Random.nextInt, partition = 0, dir = remoteDir, hadoopConf = hadoopConf)
+
+    // Disable failure of output stream and generate versions
+    CreateAtomicTestManager.shouldFailInCreateAtomic = false
+    for (version <- 1 to 10) {
+      val store = provider.getStore(version - 1)
+      put(store, version.toString, version) // update "1" -> 1, "2" -> 2, ...
+      store.commit()
+    }
+    val version10Data = (1L to 10).map(_.toString).map(x => x -> x).toSet
+
+    CreateAtomicTestManager.cancelCalledInCreateAtomic = false
+    val store = provider.getStore(10)
+    // Fail commit for next version and verify that reloading resets the files
+    CreateAtomicTestManager.shouldFailInCreateAtomic = true
+    put(store, "11", 11)
+    val e = intercept[IllegalStateException] { quietly { store.commit() } }
+    assert(e.getCause.isInstanceOf[IOException])
+    CreateAtomicTestManager.shouldFailInCreateAtomic = false
+
+    // Abort commit for next version and verify that reloading resets the files
+    CreateAtomicTestManager.cancelCalledInCreateAtomic = false
+    val store2 = provider.getStore(10)
+    put(store2, "11", 11)
+    store2.abort()
+    assert(CreateAtomicTestManager.cancelCalledInCreateAtomic)
+  }
+
   override def newStoreProvider(): HDFSBackedStateStoreProvider = {
     newStoreProvider(opId = Random.nextInt(), partition = 0)
   }
@@ -720,6 +756,14 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
    * this provider
    */
   def getData(storeProvider: ProviderClass, version: Int): Set[(String, Int)]
+
+  protected def testQuietly(name: String)(f: => Unit): Unit = {
+    test(name) {
+      quietly {
+        f
+      }
+    }
+  }
 }
 
 object StateStoreTestsHelper {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 00741d660dd2d..af0268fa47871 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -99,7 +99,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
    * been processed.
    */
   object AddData {
-    def apply[A](source: MemoryStream[A], data: A*): AddDataMemory[A] =
+    def apply[A](source: MemoryStreamBase[A], data: A*): AddDataMemory[A] =
       AddDataMemory(source, data)
   }
 
@@ -131,7 +131,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
     def runAction(): Unit
   }
 
-  case class AddDataMemory[A](source: MemoryStream[A], data: Seq[A]) extends AddData {
+  case class AddDataMemory[A](source: MemoryStreamBase[A], data: Seq[A]) extends AddData {
     override def toString: String = s"AddData to $source: ${data.mkString(",")}"
 
     override def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset) = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index f5884b9c8de12..c318b951ff992 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
+import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.streaming.{StreamTest, Trigger}
 import org.apache.spark.sql.test.TestSparkSession
@@ -53,32 +54,24 @@ class ContinuousSuiteBase extends StreamTest {
   // A continuous trigger that will only fire the initial time for the duration of a test.
   // This allows clean testing with manual epoch advancement.
   protected val longContinuousTrigger = Trigger.Continuous("1 hour")
+
+  override protected val defaultTrigger = Trigger.Continuous(100)
+  override protected val defaultUseV2Sink = true
 }
 
 class ContinuousSuite extends ContinuousSuiteBase {
   import testImplicits._
 
-  test("basic rate source") {
-    val df = spark.readStream
-      .format("rate")
-      .option("numPartitions", "5")
-      .option("rowsPerSecond", "5")
-      .load()
-      .select('value)
+  test("basic") {
+    val input = ContinuousMemoryStream[Int]
 
-    testStream(df, useV2Sink = true)(
-      StartStream(longContinuousTrigger),
-      AwaitEpoch(0),
-      Execute(waitForRateSourceTriggers(_, 2)),
-      IncrementEpoch(),
-      CheckAnswerRowsContains(scala.Range(0, 10).map(Row(_))),
+    testStream(input.toDF())(
+      AddData(input, 0, 1, 2),
+      CheckAnswer(0, 1, 2),
       StopStream,
-      StartStream(longContinuousTrigger),
-      AwaitEpoch(2),
-      Execute(waitForRateSourceTriggers(_, 2)),
-      IncrementEpoch(),
-      CheckAnswerRowsContains(scala.Range(0, 20).map(Row(_))),
-      StopStream)
+      AddData(input, 3, 4, 5),
+      StartStream(),
+      CheckAnswer(0, 1, 2, 3, 4, 5))
   }
 
   test("map") {
@@ -171,6 +164,25 @@ class ContinuousSuite extends ContinuousSuiteBase {
       "Continuous processing does not support current time operations."))
   }
 
+  test("subquery alias") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "5")
+      .load()
+      .createOrReplaceTempView("rate")
+    val test = spark.sql("select value from rate where value > 5")
+
+    testStream(test, useV2Sink = true)(
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(0),
+      Execute(waitForRateSourceTriggers(_, 2)),
+      IncrementEpoch(),
+      Execute(waitForRateSourceTriggers(_, 4)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(6, 20).map(Row(_))))
+  }
+
   test("repeatedly restart") {
     val df = spark.readStream
       .format("rate")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
new file mode 100644
index 0000000000000..99e30561f81d5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.continuous
+
+import org.mockito.InOrder
+import org.mockito.Matchers.{any, eq => eqTo}
+import org.mockito.Mockito._
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.mockito.MockitoSugar
+
+import org.apache.spark._
+import org.apache.spark.rpc.RpcEndpointRef
+import org.apache.spark.sql.LocalSparkSession
+import org.apache.spark.sql.execution.streaming.continuous._
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReader, PartitionOffset}
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamWriter
+import org.apache.spark.sql.test.TestSparkSession
+
+class EpochCoordinatorSuite
+  extends SparkFunSuite
+    with LocalSparkSession
+    with MockitoSugar
+    with BeforeAndAfterEach {
+
+  private var epochCoordinator: RpcEndpointRef = _
+
+  private var writer: StreamWriter = _
+  private var query: ContinuousExecution = _
+  private var orderVerifier: InOrder = _
+
+  override def beforeEach(): Unit = {
+    val reader = mock[ContinuousReader]
+    writer = mock[StreamWriter]
+    query = mock[ContinuousExecution]
+    orderVerifier = inOrder(writer, query)
+
+    spark = new TestSparkSession()
+
+    epochCoordinator
+      = EpochCoordinatorRef.create(writer, reader, query, "test", 1, spark, SparkEnv.get)
+  }
+
+  test("single epoch") {
+    setWriterPartitions(3)
+    setReaderPartitions(2)
+
+    commitPartitionEpoch(0, 1)
+    commitPartitionEpoch(1, 1)
+    commitPartitionEpoch(2, 1)
+    reportPartitionOffset(0, 1)
+    reportPartitionOffset(1, 1)
+
+    // Here and in subsequent tests this is called to make a synchronous call to EpochCoordinator
+    // so that mocks would have been acted upon by the time verification happens
+    makeSynchronousCall()
+
+    verifyCommit(1)
+  }
+
+  test("single epoch, all but one writer partition has committed") {
+    setWriterPartitions(3)
+    setReaderPartitions(2)
+
+    commitPartitionEpoch(0, 1)
+    commitPartitionEpoch(1, 1)
+    reportPartitionOffset(0, 1)
+    reportPartitionOffset(1, 1)
+
+    makeSynchronousCall()
+
+    verifyNoCommitFor(1)
+  }
+
+  test("single epoch, all but one reader partition has reported an offset") {
+    setWriterPartitions(3)
+    setReaderPartitions(2)
+
+    commitPartitionEpoch(0, 1)
+    commitPartitionEpoch(1, 1)
+    commitPartitionEpoch(2, 1)
+    reportPartitionOffset(0, 1)
+
+    makeSynchronousCall()
+
+    verifyNoCommitFor(1)
+  }
+
+  test("consequent epochs, messages for epoch (k + 1) arrive after messages for epoch k") {
+    setWriterPartitions(2)
+    setReaderPartitions(2)
+
+    commitPartitionEpoch(0, 1)
+    commitPartitionEpoch(1, 1)
+    reportPartitionOffset(0, 1)
+    reportPartitionOffset(1, 1)
+
+    commitPartitionEpoch(0, 2)
+    commitPartitionEpoch(1, 2)
+    reportPartitionOffset(0, 2)
+    reportPartitionOffset(1, 2)
+
+    makeSynchronousCall()
+
+    verifyCommitsInOrderOf(List(1, 2))
+  }
+
+  ignore("consequent epochs, a message for epoch k arrives after messages for epoch (k + 1)") {
+    setWriterPartitions(2)
+    setReaderPartitions(2)
+
+    commitPartitionEpoch(0, 1)
+    commitPartitionEpoch(1, 1)
+    reportPartitionOffset(0, 1)
+
+    commitPartitionEpoch(0, 2)
+    commitPartitionEpoch(1, 2)
+    reportPartitionOffset(0, 2)
+    reportPartitionOffset(1, 2)
+
+    // Message that arrives late
+    reportPartitionOffset(1, 1)
+
+    makeSynchronousCall()
+
+    verifyCommitsInOrderOf(List(1, 2))
+  }
+
+  ignore("several epochs, messages arrive in order 1 -> 3 -> 4 -> 2") {
+    setWriterPartitions(1)
+    setReaderPartitions(1)
+
+    commitPartitionEpoch(0, 1)
+    reportPartitionOffset(0, 1)
+
+    commitPartitionEpoch(0, 3)
+    reportPartitionOffset(0, 3)
+
+    commitPartitionEpoch(0, 4)
+    reportPartitionOffset(0, 4)
+
+    commitPartitionEpoch(0, 2)
+    reportPartitionOffset(0, 2)
+
+    makeSynchronousCall()
+
+    verifyCommitsInOrderOf(List(1, 2, 3, 4))
+  }
+
+  ignore("several epochs, messages arrive in order 1 -> 3 -> 5 -> 4 -> 2") {
+    setWriterPartitions(1)
+    setReaderPartitions(1)
+
+    commitPartitionEpoch(0, 1)
+    reportPartitionOffset(0, 1)
+
+    commitPartitionEpoch(0, 3)
+    reportPartitionOffset(0, 3)
+
+    commitPartitionEpoch(0, 5)
+    reportPartitionOffset(0, 5)
+
+    commitPartitionEpoch(0, 4)
+    reportPartitionOffset(0, 4)
+
+    commitPartitionEpoch(0, 2)
+    reportPartitionOffset(0, 2)
+
+    makeSynchronousCall()
+
+    verifyCommitsInOrderOf(List(1, 2, 3, 4, 5))
+  }
+
+  private def setWriterPartitions(numPartitions: Int): Unit = {
+    epochCoordinator.askSync[Unit](SetWriterPartitions(numPartitions))
+  }
+
+  private def setReaderPartitions(numPartitions: Int): Unit = {
+    epochCoordinator.askSync[Unit](SetReaderPartitions(numPartitions))
+  }
+
+  private def commitPartitionEpoch(partitionId: Int, epoch: Long): Unit = {
+    val dummyMessage: WriterCommitMessage = mock[WriterCommitMessage]
+    epochCoordinator.send(CommitPartitionEpoch(partitionId, epoch, dummyMessage))
+  }
+
+  private def reportPartitionOffset(partitionId: Int, epoch: Long): Unit = {
+    val dummyOffset: PartitionOffset = mock[PartitionOffset]
+    epochCoordinator.send(ReportPartitionOffset(partitionId, epoch, dummyOffset))
+  }
+
+  private def makeSynchronousCall(): Unit = {
+    epochCoordinator.askSync[Long](GetCurrentEpoch)
+  }
+
+  private def verifyCommit(epoch: Long): Unit = {
+    orderVerifier.verify(writer).commit(eqTo(epoch), any())
+    orderVerifier.verify(query).commit(epoch)
+  }
+
+  private def verifyNoCommitFor(epoch: Long): Unit = {
+    verify(writer, never()).commit(eqTo(epoch), any())
+    verify(query, never()).commit(epoch)
+  }
+
+  private def verifyCommitsInOrderOf(epochs: Seq[Long]): Unit = {
+    epochs.foreach(verifyCommit)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
index e758c865b908f..8968dbf36d507 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
@@ -60,6 +60,7 @@ trait SharedSparkSession
   protected implicit def sqlContext: SQLContext = _spark.sqlContext
 
   protected def createSparkSession: TestSparkSession = {
+    SparkSession.cleanupAnyExistingSession()
     new TestSparkSession(sparkConf)
   }
 
@@ -92,11 +93,22 @@ trait SharedSparkSession
    * Stop the underlying [[org.apache.spark.SparkContext]], if any.
    */
   protected override def afterAll(): Unit = {
-    super.afterAll()
-    if (_spark != null) {
-      _spark.sessionState.catalog.reset()
-      _spark.stop()
-      _spark = null
+    try {
+      super.afterAll()
+    } finally {
+      try {
+        if (_spark != null) {
+          try {
+            _spark.sessionState.catalog.reset()
+          } finally {
+            _spark.stop()
+            _spark = null
+          }
+        }
+      } finally {
+        SparkSession.clearActiveSession()
+        SparkSession.clearDefaultSession()
+      }
     }
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index cc8907a0bbc93..b5444a4217924 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -381,7 +381,7 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
 
     val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
       soi.getStructFieldRef(attr.name) -> ordinal
-    }.unzip
+    }.toArray.unzip
 
     /**
      * Builds specific unwrappers ahead of time according to object inspector
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index 1a86c604d5da3..3af163af0968c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -419,7 +419,7 @@ class PartitionedTablePerfStatsSuite
       HiveCatalogMetrics.reset()
       spark.read.load(dir.getAbsolutePath)
       assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 1)
-      assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 1)
+      assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 0)
     }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index db76ec9d084cb..c85db78c732de 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1461,7 +1461,7 @@ class HiveDDLSuite
         assert(e2.getMessage.contains(forbiddenPrefix + "foo"))
 
         val e3 = intercept[AnalysisException] {
-          sql(s"CREATE TABLE tbl (a INT) TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')")
+          sql(s"CREATE TABLE tbl2 (a INT) TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')")
         }
         assert(e3.getMessage.contains(forbiddenPrefix + "foo"))
       }