Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Feature: nvBLAS and OpenBLAS plugin (#539)
Browse files Browse the repository at this point in the history
* add openblas plugin, update gpu docker images with netlib-lgpl

* update images and plugins

* add nvblas plugin

* revert gpu docker image change, add -Pnetlib-lgpl to base images

* change configuraitons to functions, add pugins to cluster.yaml
  • Loading branch information
jafreck authored May 16, 2018
1 parent 94e551c commit 603a413
Show file tree
Hide file tree
Showing 17 changed files with 164 additions and 43 deletions.
2 changes: 2 additions & 0 deletions aztk/models/plugins/internal/plugin_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class PluginManager:
hdfs=plugins.HDFSPlugin,
simple=plugins.SimplePlugin,
spark_ui_proxy=plugins.SparkUIProxyPlugin,
openblas=plugins.OpenBLASPlugin,
nvblas=plugins.NvBLASPlugin,
)

def __init__(self):
Expand Down
2 changes: 2 additions & 0 deletions aztk/spark/models/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
from .rstudio_server import RStudioServerPlugin
from .simple import SimplePlugin
from .spark_ui_proxy import SparkUIProxyPlugin
from .openblas import OpenBLASPlugin
from .nvblas import NvBLASPlugin
1 change: 1 addition & 0 deletions aztk/spark/models/plugins/nvblas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .configuration import *
18 changes: 18 additions & 0 deletions aztk/spark/models/plugins/nvblas/configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
from aztk.models.plugins.plugin_file import PluginFile
from aztk.utils import constants

dir_path = os.path.dirname(os.path.realpath(__file__))


def NvBLASPlugin():
return PluginConfiguration(
name="nvblas",
ports=[],
target_role=PluginTargetRole.All,
execute="nvblas.sh",
files=[
PluginFile("nvblas.sh", os.path.join(dir_path, "nvblas.sh")),
]
)
65 changes: 65 additions & 0 deletions aztk/spark/models/plugins/nvblas/nvblas.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash

apt-get update &&
apt-get install -y libblas-dev liblapack-dev &&
update-alternatives --config libblas.so.3
update-alternatives --config liblapack.so.3

export NVBLAS_CONFIG_FILE=/usr/local/cuda/lib64/nvblas.conf
echo "export NVBLAS_CONFIG_FILE=/usr/local/cuda/lib64/nvblas.conf" >> ~/.bashrc

echo '# This is the configuration file to use NVBLAS Library
# Setup the environment variable NVBLAS_CONFIG_FILE to specify your own config file.
# By default, if NVBLAS_CONFIG_FILE is not defined,
# NVBLAS Library will try to open the file "nvblas.conf" in its current directory
# Example : NVBLAS_CONFIG_FILE /home/cuda_user/my_nvblas.conf
# The config file should have restricted write permissions accesses
# Specify which output log file (default is stderr)
NVBLAS_LOGFILE /root/nvblas.log
# Enable trace log of every intercepted BLAS calls
NVBLAS_TRACE_LOG_ENABLED
#Put here the CPU BLAS fallback Library of your choice
#It is strongly advised to use full path to describe the location of the CPU Library
NVBLAS_CPU_BLAS_LIB /usr/lib/libblas.so
# List of GPU devices Id to participate to the computation
# Use ALL if you want all your GPUs to contribute
# Use ALL0, if you want all your GPUs of the same type as device 0 to contribute
# However, NVBLAS consider that all GPU have the same performance and PCI bandwidth
# By default if no GPU are listed, only device 0 will be used
#NVBLAS_GPU_LIST 0 2 4
#NVBLAS_GPU_LIST ALL
NVBLAS_GPU_LIST ALL0
# Tile Dimension
NVBLAS_TILE_DIM 2048
# Autopin Memory
NVBLAS_AUTOPIN_MEM_ENABLED
#List of BLAS routines that are prevented from running on GPU (use for debugging purpose
# The current list of BLAS routines supported by NVBLAS are
# GEMM, SYRK, HERK, TRSM, TRMM, SYMM, HEMM, SYR2K, HER2K
#NVBLAS_GPU_DISABLED_SGEMM
#NVBLAS_GPU_DISABLED_DGEMM
#NVBLAS_GPU_DISABLED_CGEMM
#NVBLAS_GPU_DISABLED_ZGEMM
# Computation can be optionally hybridized between CPU and GPU
# By default, GPU-supported BLAS routines are ran fully on GPU
# The option NVBLAS_CPU_RATIO_<BLAS_ROUTINE> give the ratio [0,1]
# of the amount of computation that should be done on CPU
# CAUTION : this option should be used wisely because it can actually
# significantly reduced the overall performance if too much work is given to CPU
#NVBLAS_CPU_RATIO_CGEMM 0.07' > $NVBLAS_CONFIG_FILE

export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/libblas:/usr/local/cuda/lib64
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/libblas:/usr/local/cuda/lib64" >> ~/.bashrc
export LD_PRELOAD=/usr/local/cuda/lib64/libnvblas.so
echo "export LD_PRELOAD=/usr/local/cuda/lib64/libnvblas.so" >> ~/.bashrc
1 change: 1 addition & 0 deletions aztk/spark/models/plugins/openblas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .configuration import *
18 changes: 18 additions & 0 deletions aztk/spark/models/plugins/openblas/configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
from aztk.models.plugins.plugin_file import PluginFile
from aztk.utils import constants

dir_path = os.path.dirname(os.path.realpath(__file__))


def OpenBLASPlugin():
return PluginConfiguration(
name="openblas",
ports=[],
target_role=PluginTargetRole.All,
execute="openblas.sh",
files=[
PluginFile("openblas.sh", os.path.join(dir_path, "openblas.sh")),
],
)
4 changes: 4 additions & 0 deletions aztk/spark/models/plugins/openblas/openblas.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
apt-get update &&
apt-get install -y libopenblas-base &&
update-alternatives --config libblas.so.3
8 changes: 5 additions & 3 deletions aztk_cli/config/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Toolkit configuration [Required] You can use `aztk toolkit` command to find which are the available tookits
toolkit:
software: spark
version: 2.2.0
version: 2.3.0
# Which environemnt is needed for spark anaconda, r, miniconda
environment: {environment}
# Optional version for the environment
Expand All @@ -16,7 +16,7 @@ toolkit:


# vm_size: <vm-size, see available options here: https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/>
vm_size: standard_a2
vm_size: standard_f2

# size: <number of dedicated nodes in the cluster, not that clusters must contain all dedicated or all low priority nodes>
size: 2
Expand All @@ -39,11 +39,13 @@ username: spark

# Enable plugins
plugins:
# - name: spark_ui_proxy
# - name: jupyterlab
# - name: jupyter
# - name: hdfs
# - name: rstudio_server
# - name: spark_ui_proxy
# - name: openblas
# - name: nvblas

# Allow master node to also be a worker <true/false> (Default: true)
# worker_on_master: true
Expand Down
2 changes: 1 addition & 1 deletion docker-image/base/spark1.6.3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ RUN apt-get clean \
&& cd spark \
&& git checkout tags/v${SPARK_VERSION_KEY} \
&& export MAVEN_OPTS="-Xmx3g -XX:ReservedCodeCacheSize=1024m" \
&& ./make-distribution.sh --name custom-spark --tgz -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -Phadoop-2.6 -DskipTests \
&& ./make-distribution.sh --name custom-spark --tgz -Pnetlib-lgpl -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -Phadoop-2.6 -DskipTests \
&& tar -xvzf /spark/spark-${SPARK_VERSION_KEY}-bin-custom-spark.tgz --directory=/home \
&& ln -s "/home/spark-${SPARK_VERSION_KEY}-bin-custom-spark" /home/spark-current \
&& rm -rf /spark \
Expand Down
2 changes: 1 addition & 1 deletion docker-image/base/spark2.1.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ RUN apt-get clean \
&& cd spark \
&& git checkout tags/v${SPARK_VERSION_KEY} \
&& export MAVEN_OPTS="-Xmx3g -XX:ReservedCodeCacheSize=1024m" \
&& ./dev/make-distribution.sh --name custom-spark --pip --tgz -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -DskipTests \
&& ./dev/make-distribution.sh --name custom-spark --pip --tgz -Pnetlib-lgpl -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -DskipTests \
&& tar -xvzf /spark/spark-${SPARK_VERSION_KEY}-bin-custom-spark.tgz --directory=/home \
&& ln -s "/home/spark-${SPARK_VERSION_KEY}-bin-custom-spark" /home/spark-current \
&& rm -rf /spark \
Expand Down
2 changes: 1 addition & 1 deletion docker-image/base/spark2.2.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ RUN apt-get clean \
&& cd spark \
&& git checkout tags/v${SPARK_VERSION_KEY} \
&& export MAVEN_OPTS="-Xmx3g -XX:ReservedCodeCacheSize=1024m" \
&& ./dev/make-distribution.sh --name custom-spark --pip --tgz -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -DskipTests \
&& ./dev/make-distribution.sh --name custom-spark --pip --tgz -Pnetlib-lgpl -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -DskipTests \
&& tar -xvzf /spark/spark-${SPARK_VERSION_KEY}-bin-custom-spark.tgz --directory=/home \
&& ln -s "/home/spark-${SPARK_VERSION_KEY}-bin-custom-spark" /home/spark-current \
&& rm -rf /spark \
Expand Down
2 changes: 1 addition & 1 deletion docker-image/base/spark2.3.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ RUN apt-get clean \
&& cd spark \
&& git checkout tags/v${SPARK_VERSION_KEY} \
&& export MAVEN_OPTS="-Xmx3g -XX:ReservedCodeCacheSize=1024m" \
&& ./dev/make-distribution.sh --name custom-spark --pip --tgz -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -DskipTests \
&& ./dev/make-distribution.sh --name custom-spark --pip --tgz -Pnetlib-lgpl -Phive -Phive-thriftserver -Dhadoop.version=${HADOOP_VERSION} -DskipTests \
&& tar -xvzf /spark/spark-${SPARK_VERSION_KEY}-bin-custom-spark.tgz --directory=/home \
&& ln -s "/home/spark-${SPARK_VERSION_KEY}-bin-custom-spark" /home/spark-current \
&& rm -rf /spark \
Expand Down
20 changes: 11 additions & 9 deletions docker-image/gpu/spark1.6.3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ FROM aztk/spark:v0.1.0-spark1.6.3-base

LABEL com.nvidia.volumes.needed="nvidia_driver"

RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \
rm -rf /var/lib/apt/lists/* && \
NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list

# CUDA
ENV CUDA_VERSION 8.0.61
Expand Down Expand Up @@ -45,9 +47,13 @@ ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs:${LIBRARY_PATH}

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=8.0"

# cuDNN
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
RUN echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list

ENV CUDNN_VERSION 6.0.21

Expand All @@ -72,8 +78,4 @@ ENV NUMBAPRO_LIBDEVICE /usr/local/cuda/nvvm/libdevice/
ENV NUMBAPRO_NVVM /usr/local/cuda-8.0/nvvm/lib64/libnvvm.so
ENV NUMBAPRO_CUDALIB /usr/local/cuda-8.0/targets/x86_64-linux/lib/

# # Tensorflow
# RUN pip install --upgrade tensorflow-gpu

WORKDIR $SPARK_HOME
CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"]
CMD ["/bin/bash"]
20 changes: 11 additions & 9 deletions docker-image/gpu/spark2.1.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ FROM aztk/spark:v0.1.0-spark2.1.0-base

LABEL com.nvidia.volumes.needed="nvidia_driver"

RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \
rm -rf /var/lib/apt/lists/* && \
NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list

# CUDA
ENV CUDA_VERSION 8.0.61
Expand Down Expand Up @@ -45,9 +47,13 @@ ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs:${LIBRARY_PATH}

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=8.0"

# cuDNN
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
RUN echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list

ENV CUDNN_VERSION 6.0.21

Expand All @@ -72,8 +78,4 @@ ENV NUMBAPRO_LIBDEVICE /usr/local/cuda/nvvm/libdevice/
ENV NUMBAPRO_NVVM /usr/local/cuda-8.0/nvvm/lib64/libnvvm.so
ENV NUMBAPRO_CUDALIB /usr/local/cuda-8.0/targets/x86_64-linux/lib/

# # Tensorflow
# RUN pip install --upgrade tensorflow-gpu

WORKDIR $SPARK_HOME
CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"]
CMD ["/bin/bash"]
20 changes: 11 additions & 9 deletions docker-image/gpu/spark2.2.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ FROM aztk/spark:v0.1.0-spark2.2.0-base

LABEL com.nvidia.volumes.needed="nvidia_driver"

RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \
rm -rf /var/lib/apt/lists/* && \
NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list

# CUDA
ENV CUDA_VERSION 8.0.61
Expand Down Expand Up @@ -45,9 +47,13 @@ ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs:${LIBRARY_PATH}

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=8.0"

# cuDNN
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
RUN echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list

ENV CUDNN_VERSION 6.0.21

Expand All @@ -72,8 +78,4 @@ ENV NUMBAPRO_LIBDEVICE /usr/local/cuda/nvvm/libdevice/
ENV NUMBAPRO_NVVM /usr/local/cuda-8.0/nvvm/lib64/libnvvm.so
ENV NUMBAPRO_CUDALIB /usr/local/cuda-8.0/targets/x86_64-linux/lib/

# # Tensorflow
# RUN pip install --upgrade tensorflow-gpu

WORKDIR $SPARK_HOME
CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"]
CMD ["/bin/bash"]
20 changes: 11 additions & 9 deletions docker-image/gpu/spark2.3.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ FROM aztk/spark:v0.1.0-spark2.3.0-base

LABEL com.nvidia.volumes.needed="nvidia_driver"

RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \
rm -rf /var/lib/apt/lists/* && \
NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list

# CUDA
ENV CUDA_VERSION 8.0.61
Expand Down Expand Up @@ -45,9 +47,13 @@ ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs:${LIBRARY_PATH}

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=8.0"

# cuDNN
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
RUN echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list

ENV CUDNN_VERSION 6.0.21

Expand All @@ -72,8 +78,4 @@ ENV NUMBAPRO_LIBDEVICE /usr/local/cuda/nvvm/libdevice/
ENV NUMBAPRO_NVVM /usr/local/cuda-8.0/nvvm/lib64/libnvvm.so
ENV NUMBAPRO_CUDALIB /usr/local/cuda-8.0/targets/x86_64-linux/lib/

# # Tensorflow
# RUN pip install --upgrade tensorflow-gpu

WORKDIR $SPARK_HOME
CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"]
CMD ["/bin/bash"]

0 comments on commit 603a413

Please sign in to comment.