From 1527929e303bfb9db7ec3f01650dc0e160413918 Mon Sep 17 00:00:00 2001 From: Jacob Freck Date: Mon, 21 May 2018 13:22:55 -0700 Subject: [PATCH] Feature: TensorflowOnSpark python plugin (#525) * initial commit * update * update * add gpu support * remove comment * change class to function * fix merge issue * add some docs --- .../models/plugins/internal/plugin_manager.py | 1 + aztk/spark/models/plugins/__init__.py | 1 + .../plugins/tensorflow_on_spark/__init__.py | 1 + .../tensorflow_on_spark/configuration.py | 17 ++++++++++++++++ .../tensorflow_on_spark.sh | 20 +++++++++++++++++++ aztk_cli/config/cluster.yaml | 1 + docs/15-plugins.md | 5 +++-- 7 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 aztk/spark/models/plugins/tensorflow_on_spark/__init__.py create mode 100644 aztk/spark/models/plugins/tensorflow_on_spark/configuration.py create mode 100644 aztk/spark/models/plugins/tensorflow_on_spark/tensorflow_on_spark.sh diff --git a/aztk/models/plugins/internal/plugin_manager.py b/aztk/models/plugins/internal/plugin_manager.py index 8789dab1..cebb6d62 100644 --- a/aztk/models/plugins/internal/plugin_manager.py +++ b/aztk/models/plugins/internal/plugin_manager.py @@ -23,6 +23,7 @@ class PluginManager: hdfs=plugins.HDFSPlugin, simple=plugins.SimplePlugin, spark_ui_proxy=plugins.SparkUIProxyPlugin, + tensorflow_on_spark=plugins.TensorflowOnSparkPlugin, openblas=plugins.OpenBLASPlugin, nvblas=plugins.NvBLASPlugin, ) diff --git a/aztk/spark/models/plugins/__init__.py b/aztk/spark/models/plugins/__init__.py index a67db47f..9bf81f09 100644 --- a/aztk/spark/models/plugins/__init__.py +++ b/aztk/spark/models/plugins/__init__.py @@ -5,5 +5,6 @@ from .rstudio_server import RStudioServerPlugin from .simple import SimplePlugin from .spark_ui_proxy import SparkUIProxyPlugin +from .tensorflow_on_spark import TensorflowOnSparkPlugin from .openblas import OpenBLASPlugin from .nvblas import NvBLASPlugin diff --git a/aztk/spark/models/plugins/tensorflow_on_spark/__init__.py b/aztk/spark/models/plugins/tensorflow_on_spark/__init__.py new file mode 100644 index 00000000..2ec26f31 --- /dev/null +++ b/aztk/spark/models/plugins/tensorflow_on_spark/__init__.py @@ -0,0 +1 @@ +from .configuration import * diff --git a/aztk/spark/models/plugins/tensorflow_on_spark/configuration.py b/aztk/spark/models/plugins/tensorflow_on_spark/configuration.py new file mode 100644 index 00000000..e5aea89f --- /dev/null +++ b/aztk/spark/models/plugins/tensorflow_on_spark/configuration.py @@ -0,0 +1,17 @@ +import os +from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole +from aztk.models.plugins.plugin_file import PluginFile +from aztk.utils import constants + +dir_path = os.path.dirname(os.path.realpath(__file__)) + + +def TensorflowOnSparkPlugin(): + return PluginConfiguration( + name="tensorflow_on_spark", + target_role=PluginTargetRole.Master, + execute="tensorflow_on_spark.sh", + files=[ + PluginFile("tensorflow_on_spark.sh", os.path.join(dir_path, "tensorflow_on_spark.sh")), + ], + ) diff --git a/aztk/spark/models/plugins/tensorflow_on_spark/tensorflow_on_spark.sh b/aztk/spark/models/plugins/tensorflow_on_spark/tensorflow_on_spark.sh new file mode 100644 index 00000000..a3fae52b --- /dev/null +++ b/aztk/spark/models/plugins/tensorflow_on_spark/tensorflow_on_spark.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# This plugin requires HDFS to be enabled and on the path + +# setup TensorFlowOnSpark +git clone https://github.com/yahoo/TensorFlowOnSpark.git +cd TensorFlowOnSpark +export TFoS_HOME=$(pwd) +export TFoS_HOME=~/TensorFlowOnSpark >> ~/.bashrc + +if [ "$AZTK_GPU_ENABLED" = "true" ]; then + pip install tensorflow-gpu + pip install tensorflowonspark +else + pip install tensorflow-cpu + pip install tensorflowonspark +fi + +# add libhdfs.so to path +echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/libhdfs.so" >> ~/.bashrc diff --git a/aztk_cli/config/cluster.yaml b/aztk_cli/config/cluster.yaml index 929f3688..2bd00fe5 100644 --- a/aztk_cli/config/cluster.yaml +++ b/aztk_cli/config/cluster.yaml @@ -44,6 +44,7 @@ plugins: # - name: hdfs # - name: rstudio_server # - name: spark_ui_proxy + # - name: tensorflow_on_spark # - name: openblas # - name: nvblas diff --git a/docs/15-plugins.md b/docs/15-plugins.md index 51f669a0..c6d9573c 100644 --- a/docs/15-plugins.md +++ b/docs/15-plugins.md @@ -13,7 +13,9 @@ Currently the following plugins are supported: - Jupyter - HDFS - RStudioServer -- Spark UI Proxy +- TensorflowOnSpark +- OpenBLAS +- mvBLAS ### Enable a plugin using the CLI If you are uing the `aztk` CLI and wish to enable a supported plugin, you need to update you `.aztk/cluster.yaml` configuration file. @@ -38,7 +40,6 @@ cluster_config = ClusterConfiguration( ...# Other config, plugins=[ JupyterPlugin(), - RStudioServerPlugin(version="1.1.383"), HDFSPlugin(), ] )