From 32a514b555d3fb3af91ba3e448ddd207f408f32a Mon Sep 17 00:00:00 2001 From: Yuqing Yang Date: Fri, 5 Jul 2019 15:28:17 +0800 Subject: [PATCH] Sdk 0.4.01 - update documents and versioning (#3051) * update documents and versions * remove developing branch name in the document * fix bugs (1) integer password (2) notepad editing; update documents about selecting python version --- contrib/python-sdk/README.md | 32 +++++++++++++------ contrib/python-sdk/openpaisdk/__init__.py | 5 +-- contrib/python-sdk/openpaisdk/cli_factory.py | 7 ++-- contrib/python-sdk/openpaisdk/cluster.py | 26 +++++++++++++-- contrib/python-sdk/openpaisdk/command_line.py | 1 + contrib/python-sdk/setup.py | 2 +- 6 files changed, 56 insertions(+), 17 deletions(-) diff --git a/contrib/python-sdk/README.md b/contrib/python-sdk/README.md index db46146406..d63ed28eba 100644 --- a/contrib/python-sdk/README.md +++ b/contrib/python-sdk/README.md @@ -33,10 +33,12 @@ Besides above benefits, this project also provides powerful runtime support, whi - [How to preview the generated job config but not submit it](#How-to-preview-the-generated-job-config-but-not-submit-it) - [`Jupyter` notebook](#Jupyter-notebook) - [How to run a local notebook with remote resources](#How-to-run-a-local-notebook-with-remote-resources) - - [How to launch a remote jupyter server and connect it](#How-to-launch-a-remote-jupyter-server-and-connect-it) + - [How to launch a remote `Jupyter` server and connect it](#How-to-launch-a-remote-Jupyter-server-and-connect-it) - [Other FAQ of CLI](#Other-FAQ-of-CLI) - [How to select a cluster to use until I change it](#How-to-select-a-cluster-to-use-until-I-change-it) - [How to simplify the command](#How-to-simplify-the-command) + - [How to install a different version of SDK](#How-to-install-a-different-version-of-SDK) + - [How to specify the `python` environment I want to use in the job container](#How-to-specify-the-python-environment-I-want-to-use-in-the-job-container) - [Python binding](#Python-binding) - [Cluster management](#Cluster-management) - [Job management](#Job-management) @@ -55,20 +57,16 @@ We provide installing method leveraging `pip install` ```bash python -m pip install --upgrade pip -pip install -U -e "git+https://github.com/Microsoft/pai@sdk-release-v0.4.00#egg=openpaisdk&subdirectory=contrib/python-sdk" +pip install -U -e "git+https://github.com/Microsoft/pai@master#egg=openpaisdk&subdirectory=contrib/python-sdk" ``` -The `sdk-release-v0.4.00` is the branch name which containing the source code of SDK. User may change it to another branch to install another version of the package. - -After installing, please verify by CLI or python binding as below. +Refer to [How to install a different version of SDK](#How-to-install-a-different-version-of-SDK) for more details about installing. After installing, please verify by CLI or python binding as below. ```bash opai -h python -c "from openpaisdk import __version__; print(__version__)" ``` -And you may also change it to another branch (only take effect in the job container) by `opai set sdk-branch=`. - ### Dependencies - The package requires python3 (mainly because of `type hinting`), and we only tested it on `py3.5+` environment. _Only commands `job sub` and `job notebook` require installing this project inside container, others don't make any constraints of `python` version in the docker container._ @@ -269,9 +267,9 @@ This command requires options as the `opai job sub` does. This command would - _Local_ - wait and query the job state until its status to be `SUCCEEDED` - _Local_ - download `` to local and open it with web browser -### How to launch a remote jupyter server and connect it +### How to launch a remote `Jupyter` server and connect it -Sometimes user may want to launch a remote jupyter server and do some work on it interactively. To do this, just add `--interactive` in `job notebook` command. After submitting the job, a link like `http://x.x.x.x:port/notebooks/` will be opened in your browser. Since it takes a while to start the container, please wait and refresh the page until the notebook opens. Use the default token `abcd` (unless it is overriden by `--token `) to login the notebook. +Sometimes user may want to launch a remote `Jupyter` server and do some work on it interactively. To do this, just add `--interactive` in `job notebook` command. After submitting the job, a link like `http://x.x.x.x:port/notebooks/` will be opened in your browser. Since it takes a while to start the container, please wait and refresh the page until the notebook opens. Use the default token `abcd` (unless it is overridden by `--token `) to login the notebook. ## Other FAQ of CLI @@ -302,7 +300,21 @@ Some commonly used default variables includes - `workspace=` - `sdk-branch=` which branch to use when install the sdk in job container +### How to install a different version of SDK + +User could easily switch to another version of SDK both in local environment and in job container. In local environment, user just change `` to another branch (e.g. `pai-0.14.y` for `OpenPAI` end-June release or a feature developing branch for the canary version). + +```bash +pip install -U -e "git+https://github.com/Microsoft/pai@#egg=openpaisdk&subdirectory=contrib/python-sdk" +``` + +To debug a local update, just use `pip install -U -e your/path/to/setup.py`. + +For jobs submitted by the SDK or command line tool, the version specified by `opai set sdk-branch=` would be used firstly. If not specified, `master` branch will be used. + +### How to specify the `python` environment I want to use in the job container +In some cases, there are more than one `python` environments in a docker image. For example, there are both `python` and `python3` environments in `openpai/pai.example.keras.tensorflow`. User could add `--python ` (e.g. `--python python3`) in the command `job notebook` or `job sub` to use the specific `python` environment. Refer to [notebook example](examples/1-submit-and-query-via-command-line.ipynb) for more details. # Python binding @@ -324,7 +336,7 @@ client.jobs(name) client.rest_api_submit(job_config) ``` -- [x] the `Cluster` class has methods to access storage (through WebHDFS only for this version) +- [x] the `Cluster` class has methods to access storage (through `WebHDFS` only for this version) ```python Cluster(...).storage.upload/download(...) diff --git a/contrib/python-sdk/openpaisdk/__init__.py b/contrib/python-sdk/openpaisdk/__init__.py index 7a4f9316f9..2b39601af5 100644 --- a/contrib/python-sdk/openpaisdk/__init__.py +++ b/contrib/python-sdk/openpaisdk/__init__.py @@ -9,10 +9,11 @@ __version__ = '0.4.00' -__sdk_branch__ = 'sdk-release-v0.4.00' +__sdk_branch__ = 'master' -def get_install_uri(ver: str=__sdk_branch__): +def get_install_uri(ver: str=None): + ver = __sdk_branch__ if not ver else ver return '-e "git+https://github.com/Microsoft/pai@{}#egg=openpaisdk&subdirectory=contrib/python-sdk"'.format(ver) diff --git a/contrib/python-sdk/openpaisdk/cli_factory.py b/contrib/python-sdk/openpaisdk/cli_factory.py index 82791719c0..8c7c399f5a 100644 --- a/contrib/python-sdk/openpaisdk/cli_factory.py +++ b/contrib/python-sdk/openpaisdk/cli_factory.py @@ -39,6 +39,7 @@ def __init__(self, action: str, allowed_actions: dict): self.do_action = getattr(self, "do_action_" + suffix, None) self.__job__ = Job() self.__clusters__ = ClusterList() + self.disable_saving = dict() def restore(self, args): if getattr(args, 'job_name', None): @@ -47,8 +48,10 @@ def restore(self, args): return self def store(self, args): - self.__job__.save() - self.__clusters__.save() + if not self.disable_saving.get("job", False): + self.__job__.save() + if not self.disable_saving.get("clusters", False): + self.__clusters__.save() return self diff --git a/contrib/python-sdk/openpaisdk/cluster.py b/contrib/python-sdk/openpaisdk/cluster.py index d1b153cde6..7c9f54795d 100644 --- a/contrib/python-sdk/openpaisdk/cluster.py +++ b/contrib/python-sdk/openpaisdk/cluster.py @@ -23,6 +23,20 @@ def new(c_dic: dict): dic.update(c_dic) return dic + @staticmethod + def make_string(pth: str, target: str, iters = (list, dict)): + flag = True + assert isinstance(target, iters), "not supported type %s (%s)" % (pth, type(target)) + for i in (target.keys() if isinstance(target, dict) else range(len(target))): + pth_next = "%s[%s]" % (pth, i) + if isinstance(target[i], iters): + flag = flag and Cluster.make_string(pth_next, target[i]) + elif not (target[i] is None or isinstance(target[i], str)): + __logger__.warn('only string is allowed in the cluster configuration, %s is %s, replace it by str()', pth_next, type(target[i])) + target[i] = str(target[i]) + flag = False + return flag + @staticmethod def attach_storage(c_dic: dict, storage: dict): if "user" in storage and not storage["user"]: @@ -31,9 +45,11 @@ def attach_storage(c_dic: dict, storage: dict): @staticmethod def validate(cluster: dict): + assert isinstance(cluster, dict), "cluster configuration should be a dict" + assert isinstance(["storages"], list), "storages should be a list" + Cluster.make_string(cluster["cluster_alias"], cluster) assert cluster["pai_uri"].startswith("http://") or cluster["pai_uri"].startswith("https://"), "pai_uri should be a uri starting with http(s)://" assert cluster["user"], "cluster should have a cluster" - assert isinstance(cluster["storages"], list), "storages should be list" return cluster @@ -45,9 +61,15 @@ class ClusterList: def __init__(self): self.clusters = [] + @staticmethod + def validate(clusters: list): + assert isinstance(clusters, list), "contents in %s should be a list" % __cluster_config_file__ + [Cluster.validate(c) for c in clusters] + return clusters + def load(self, fname: str=__cluster_config_file__): self.clusters = from_file(fname, default=[]) - assert isinstance(self.clusters, list), "contents in %s should be a list" % __cluster_config_file__ + ClusterList.validate(self.clusters) return self def save(self): diff --git a/contrib/python-sdk/openpaisdk/command_line.py b/contrib/python-sdk/openpaisdk/command_line.py index 456f935484..114c313b6e 100644 --- a/contrib/python-sdk/openpaisdk/command_line.py +++ b/contrib/python-sdk/openpaisdk/command_line.py @@ -60,6 +60,7 @@ def check_arguments_edit(self, args): def do_action_edit(self, args): run_command([args.editor, __cluster_config_file__]) + self.disable_saving["clusters"] = True def define_arguments_list(self, parser): cli_add_arguments(parser, []) diff --git a/contrib/python-sdk/setup.py b/contrib/python-sdk/setup.py index 97c3753f12..7689dc8eb2 100644 --- a/contrib/python-sdk/setup.py +++ b/contrib/python-sdk/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='openpaisdk', - version='0.1', + version='0.4.00', description='A simple SDK for OpenPAI', url='https://github.com/microsoft/pai/contrib/python-sdk', packages=['openpaisdk'],