From e763252854c8c2321d85c11fdd86057e46a6dbd8 Mon Sep 17 00:00:00 2001 From: Johannes Scheuermann Date: Fri, 9 Jun 2017 00:33:40 +0200 Subject: [PATCH] Create base-image and minimize layer count (#324) * Create base-image and minimize layer count * Create running-on-kubernetes.md --- docs/running-on-kubernetes.md | 6 +++- .../src/main/docker/driver/Dockerfile | 17 ++------- .../src/main/docker/executor/Dockerfile | 17 ++------- .../src/main/docker/init-container/Dockerfile | 16 +-------- .../docker/resource-staging-server/Dockerfile | 16 +-------- .../main/docker/shuffle-service/Dockerfile | 17 ++------- .../src/main/docker/spark-base/Dockerfile | 35 +++++++++++++++++++ .../docker/SparkDockerImageBuilder.scala | 2 ++ 8 files changed, 50 insertions(+), 76 deletions(-) create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index dc3cf738832ad..c10630fc5c5c6 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -49,7 +49,7 @@ If you wish to use pre-built docker images, you may use the images published in You may also build these docker images from sources, or customize them as required. Spark distributions include the -Docker files for the driver, executor, and init-container at `dockerfiles/driver/Dockerfile`, +Docker files for the base-image, driver, executor, and init-container at `dockerfiles/spark-base/Dockerfile`, `dockerfiles/driver/Dockerfile`, `dockerfiles/executor/Dockerfile`, and `dockerfiles/init-container/Dockerfile` respectively. Use these Docker files to build the Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images to the registry. @@ -57,12 +57,16 @@ to the registry. For example, if the registry host is `registry-host` and the registry is listening on port 5000: cd $SPARK_HOME + docker build -t registry-host:5000/spark-base:latest -f dockerfiles/driver/spark-base . docker build -t registry-host:5000/spark-driver:latest -f dockerfiles/driver/Dockerfile . docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . docker build -t registry-host:5000/spark-init:latest -f dockerfiles/init-container/Dockerfile . + docker push registry-host:5000/spark-base:latest docker push registry-host:5000/spark-driver:latest docker push registry-host:5000/spark-executor:latest docker push registry-host:5000/spark-init:latest + +Note that `spark-base` is the base image for the other images. It must be built first before the other images, and then afterwards the other images can be built in any order. ## Submitting Applications to Kubernetes diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index fa651ff43aaa0..6bbff8ef64a0f 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -15,26 +15,13 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index fbad43b6255b9..9c9efb23d7e95 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -15,26 +15,13 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples # TODO support spark.executor.extraClassPath CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index 40557a7465a8a..6bff06da12840 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -15,24 +15,10 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark - ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index c8b13c44207bc..c9a92fa1c5b62 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -15,24 +15,10 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark - ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 1f64376b89aae..7f4e2aa51b67d 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -15,25 +15,12 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService", "1" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile new file mode 100644 index 0000000000000..b0925e3bb0416 --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-base:latest -f dockerfiles/spark-base/Dockerfile . + +RUN apk upgrade --no-cache && \ + apk add --no-cache bash tini && \ + mkdir -p /opt/spark && \ + touch /opt/spark/RELEASE + +COPY jars /opt/spark/jars +COPY bin /opt/spark/bin +COPY sbin /opt/spark/sbin +COPY conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 3ff72829f88a7..4db19478f44bc 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -28,6 +28,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val DOCKER_BUILD_PATH = Paths.get("target", "docker") // Dockerfile paths must be relative to the build path. + private val BASE_DOCKER_FILE = "dockerfiles/spark-base/Dockerfile" private val DRIVER_DOCKER_FILE = "dockerfiles/driver/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" @@ -60,6 +61,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } + buildImage("spark-base", BASE_DOCKER_FILE) buildImage("spark-driver", DRIVER_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE)